xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 9f3bdf3d039430b5172c424def20ce5d9f7367d4)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <AutoDeleterDrivers.h>
36 #include <block_cache.h>
37 #include <boot/kernel_args.h>
38 #include <debug_heap.h>
39 #include <disk_device_manager/KDiskDevice.h>
40 #include <disk_device_manager/KDiskDeviceManager.h>
41 #include <disk_device_manager/KDiskDeviceUtils.h>
42 #include <disk_device_manager/KDiskSystem.h>
43 #include <fd.h>
44 #include <file_cache.h>
45 #include <fs/node_monitor.h>
46 #include <KPath.h>
47 #include <lock.h>
48 #include <low_resource_manager.h>
49 #include <slab/Slab.h>
50 #include <StackOrHeapArray.h>
51 #include <syscalls.h>
52 #include <syscall_restart.h>
53 #include <tracing.h>
54 #include <util/atomic.h>
55 #include <util/AutoLock.h>
56 #include <util/ThreadAutoLock.h>
57 #include <util/DoublyLinkedList.h>
58 #include <vfs.h>
59 #include <vm/vm.h>
60 #include <vm/VMCache.h>
61 #include <wait_for_objects.h>
62 
63 #include "EntryCache.h"
64 #include "fifo.h"
65 #include "IORequest.h"
66 #include "unused_vnodes.h"
67 #include "vfs_tracing.h"
68 #include "Vnode.h"
69 #include "../cache/vnode_store.h"
70 
71 
72 //#define TRACE_VFS
73 #ifdef TRACE_VFS
74 #	define TRACE(x) dprintf x
75 #	define FUNCTION(x) dprintf x
76 #else
77 #	define TRACE(x) ;
78 #	define FUNCTION(x) ;
79 #endif
80 
81 #define ADD_DEBUGGER_COMMANDS
82 
83 
84 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
85 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
86 
87 #if KDEBUG
88 #	define FS_CALL(vnode, op, params...) \
89 		( HAS_FS_CALL(vnode, op) ? \
90 			vnode->ops->op(vnode->mount->volume, vnode, params) \
91 			: (panic("FS_CALL: vnode %p op " #op " is NULL", vnode), 0))
92 #	define FS_CALL_NO_PARAMS(vnode, op) \
93 		( HAS_FS_CALL(vnode, op) ? \
94 			vnode->ops->op(vnode->mount->volume, vnode) \
95 			: (panic("FS_CALL_NO_PARAMS: vnode %p op " #op " is NULL", vnode), 0))
96 #	define FS_MOUNT_CALL(mount, op, params...) \
97 		( HAS_FS_MOUNT_CALL(mount, op) ? \
98 			mount->volume->ops->op(mount->volume, params) \
99 			: (panic("FS_MOUNT_CALL: mount %p op " #op " is NULL", mount), 0))
100 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
101 		( HAS_FS_MOUNT_CALL(mount, op) ? \
102 			mount->volume->ops->op(mount->volume) \
103 			: (panic("FS_MOUNT_CALL_NO_PARAMS: mount %p op " #op " is NULL", mount), 0))
104 #else
105 #	define FS_CALL(vnode, op, params...) \
106 			vnode->ops->op(vnode->mount->volume, vnode, params)
107 #	define FS_CALL_NO_PARAMS(vnode, op) \
108 			vnode->ops->op(vnode->mount->volume, vnode)
109 #	define FS_MOUNT_CALL(mount, op, params...) \
110 			mount->volume->ops->op(mount->volume, params)
111 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
112 			mount->volume->ops->op(mount->volume)
113 #endif
114 
115 
116 const static size_t kMaxPathLength = 65536;
117 	// The absolute maximum path length (for getcwd() - this is not depending
118 	// on PATH_MAX
119 
120 
121 typedef DoublyLinkedList<vnode> VnodeList;
122 
123 /*!	\brief Structure to manage a mounted file system
124 
125 	Note: The root_vnode and root_vnode->covers fields (what others?) are
126 	initialized in fs_mount() and not changed afterwards. That is as soon
127 	as the mount is mounted and it is made sure it won't be unmounted
128 	(e.g. by holding a reference to a vnode of that mount) (read) access
129 	to those fields is always safe, even without additional locking. Morever
130 	while mounted the mount holds a reference to the root_vnode->covers vnode,
131 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
132 	safe if a reference to vnode is held (note that for the root mount
133 	root_vnode->covers is NULL, though).
134 */
135 struct fs_mount {
136 	fs_mount()
137 		:
138 		volume(NULL),
139 		device_name(NULL)
140 	{
141 		mutex_init(&lock, "mount lock");
142 	}
143 
144 	~fs_mount()
145 	{
146 		mutex_destroy(&lock);
147 		free(device_name);
148 
149 		while (volume) {
150 			fs_volume* superVolume = volume->super_volume;
151 
152 			if (volume->file_system != NULL)
153 				put_module(volume->file_system->info.name);
154 
155 			free(volume->file_system_name);
156 			free(volume);
157 			volume = superVolume;
158 		}
159 	}
160 
161 	struct fs_mount* next;
162 	dev_t			id;
163 	fs_volume*		volume;
164 	char*			device_name;
165 	mutex			lock;	// guards the vnodes list
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 
176 namespace {
177 
178 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
179 	list_link		link;
180 	void*			bound_to;
181 	team_id			team;
182 	pid_t			session;
183 	off_t			start;
184 	off_t			end;
185 	bool			shared;
186 };
187 
188 typedef DoublyLinkedList<advisory_lock> LockList;
189 
190 } // namespace
191 
192 
193 struct advisory_locking {
194 	sem_id			lock;
195 	sem_id			wait_sem;
196 	LockList		locks;
197 
198 	advisory_locking()
199 		:
200 		lock(-1),
201 		wait_sem(-1)
202 	{
203 	}
204 
205 	~advisory_locking()
206 	{
207 		if (lock >= 0)
208 			delete_sem(lock);
209 		if (wait_sem >= 0)
210 			delete_sem(wait_sem);
211 	}
212 };
213 
214 /*!	\brief Guards sMountsTable.
215 
216 	The holder is allowed to read/write access the sMountsTable.
217 	Manipulation of the fs_mount structures themselves
218 	(and their destruction) requires different locks though.
219 */
220 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
221 
222 /*!	\brief Guards mount/unmount operations.
223 
224 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
225 	That is locking the lock ensures that no FS is mounted/unmounted. In
226 	particular this means that
227 	- sMountsTable will not be modified,
228 	- the fields immutable after initialization of the fs_mount structures in
229 	  sMountsTable will not be modified,
230 
231 	The thread trying to lock the lock must not hold sVnodeLock or
232 	sMountLock.
233 */
234 static recursive_lock sMountOpLock;
235 
236 /*!	\brief Guards sVnodeTable.
237 
238 	The holder is allowed read/write access to sVnodeTable and to
239 	any unbusy vnode in that table, save to the immutable fields (device, id,
240 	private_node, mount) to which only read-only access is allowed.
241 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
242 	well as the busy, removed, unused flags, and the vnode's type can also be
243 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
244 	locked. Write access to covered_by and covers requires to write lock
245 	sVnodeLock.
246 
247 	The thread trying to acquire the lock must not hold sMountLock.
248 	You must not hold this lock when calling create_sem(), as this might call
249 	vfs_free_unused_vnodes() and thus cause a deadlock.
250 */
251 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
252 
253 /*!	\brief Guards io_context::root.
254 
255 	Must be held when setting or getting the io_context::root field.
256 	The only operation allowed while holding this lock besides getting or
257 	setting the field is inc_vnode_ref_count() on io_context::root.
258 */
259 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
260 
261 
262 namespace {
263 
264 struct vnode_hash_key {
265 	dev_t	device;
266 	ino_t	vnode;
267 };
268 
269 struct VnodeHash {
270 	typedef vnode_hash_key	KeyType;
271 	typedef	struct vnode	ValueType;
272 
273 #define VHASH(mountid, vnodeid) \
274 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
275 
276 	size_t HashKey(KeyType key) const
277 	{
278 		return VHASH(key.device, key.vnode);
279 	}
280 
281 	size_t Hash(ValueType* vnode) const
282 	{
283 		return VHASH(vnode->device, vnode->id);
284 	}
285 
286 #undef VHASH
287 
288 	bool Compare(KeyType key, ValueType* vnode) const
289 	{
290 		return vnode->device == key.device && vnode->id == key.vnode;
291 	}
292 
293 	ValueType*& GetLink(ValueType* value) const
294 	{
295 		return value->next;
296 	}
297 };
298 
299 typedef BOpenHashTable<VnodeHash> VnodeTable;
300 
301 
302 struct MountHash {
303 	typedef dev_t			KeyType;
304 	typedef	struct fs_mount	ValueType;
305 
306 	size_t HashKey(KeyType key) const
307 	{
308 		return key;
309 	}
310 
311 	size_t Hash(ValueType* mount) const
312 	{
313 		return mount->id;
314 	}
315 
316 	bool Compare(KeyType key, ValueType* mount) const
317 	{
318 		return mount->id == key;
319 	}
320 
321 	ValueType*& GetLink(ValueType* value) const
322 	{
323 		return value->next;
324 	}
325 };
326 
327 typedef BOpenHashTable<MountHash> MountTable;
328 
329 } // namespace
330 
331 
332 object_cache* sPathNameCache;
333 object_cache* sVnodeCache;
334 object_cache* sFileDescriptorCache;
335 
336 #define VNODE_HASH_TABLE_SIZE 1024
337 static VnodeTable* sVnodeTable;
338 static struct vnode* sRoot;
339 
340 #define MOUNTS_HASH_TABLE_SIZE 16
341 static MountTable* sMountsTable;
342 static dev_t sNextMountID = 1;
343 
344 #define MAX_TEMP_IO_VECS 8
345 
346 // How long to wait for busy vnodes (10s)
347 #define BUSY_VNODE_RETRIES 2000
348 #define BUSY_VNODE_DELAY 5000
349 
350 mode_t __gUmask = 022;
351 
352 /* function declarations */
353 
354 static void free_unused_vnodes();
355 
356 // file descriptor operation prototypes
357 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
358 	void* buffer, size_t* _bytes);
359 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
360 	const void* buffer, size_t* _bytes);
361 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
362 	int seekType);
363 static void file_free_fd(struct file_descriptor* descriptor);
364 static status_t file_close(struct file_descriptor* descriptor);
365 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
366 	struct selectsync* sync);
367 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
368 	struct selectsync* sync);
369 static status_t dir_read(struct io_context* context,
370 	struct file_descriptor* descriptor, struct dirent* buffer,
371 	size_t bufferSize, uint32* _count);
372 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
373 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
374 static status_t dir_rewind(struct file_descriptor* descriptor);
375 static void dir_free_fd(struct file_descriptor* descriptor);
376 static status_t dir_close(struct file_descriptor* descriptor);
377 static status_t attr_dir_read(struct io_context* context,
378 	struct file_descriptor* descriptor, struct dirent* buffer,
379 	size_t bufferSize, uint32* _count);
380 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
381 static void attr_dir_free_fd(struct file_descriptor* descriptor);
382 static status_t attr_dir_close(struct file_descriptor* descriptor);
383 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
384 	void* buffer, size_t* _bytes);
385 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
386 	const void* buffer, size_t* _bytes);
387 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
388 	int seekType);
389 static void attr_free_fd(struct file_descriptor* descriptor);
390 static status_t attr_close(struct file_descriptor* descriptor);
391 static status_t attr_read_stat(struct file_descriptor* descriptor,
392 	struct stat* statData);
393 static status_t attr_write_stat(struct file_descriptor* descriptor,
394 	const struct stat* stat, int statMask);
395 static status_t index_dir_read(struct io_context* context,
396 	struct file_descriptor* descriptor, struct dirent* buffer,
397 	size_t bufferSize, uint32* _count);
398 static status_t index_dir_rewind(struct file_descriptor* descriptor);
399 static void index_dir_free_fd(struct file_descriptor* descriptor);
400 static status_t index_dir_close(struct file_descriptor* descriptor);
401 static status_t query_read(struct io_context* context,
402 	struct file_descriptor* descriptor, struct dirent* buffer,
403 	size_t bufferSize, uint32* _count);
404 static status_t query_rewind(struct file_descriptor* descriptor);
405 static void query_free_fd(struct file_descriptor* descriptor);
406 static status_t query_close(struct file_descriptor* descriptor);
407 
408 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
409 	void* buffer, size_t length);
410 static status_t common_read_stat(struct file_descriptor* descriptor,
411 	struct stat* statData);
412 static status_t common_write_stat(struct file_descriptor* descriptor,
413 	const struct stat* statData, int statMask);
414 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
415 	struct stat* stat, bool kernel);
416 
417 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
418 	bool traverseLeafLink, bool kernel,
419 	VnodePutter& _vnode, ino_t* _parentID, char* leafName = NULL);
420 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
421 	size_t bufferSize, bool kernel);
422 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
423 	VnodePutter& _vnode, ino_t* _parentID, bool kernel);
424 static void inc_vnode_ref_count(struct vnode* vnode);
425 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
426 	bool reenter);
427 static inline void put_vnode(struct vnode* vnode);
428 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
429 	bool kernel);
430 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
431 
432 
433 static struct fd_ops sFileOps = {
434 	file_read,
435 	file_write,
436 	file_seek,
437 	common_ioctl,
438 	NULL,		// set_flags
439 	file_select,
440 	file_deselect,
441 	NULL,		// read_dir()
442 	NULL,		// rewind_dir()
443 	common_read_stat,
444 	common_write_stat,
445 	file_close,
446 	file_free_fd
447 };
448 
449 static struct fd_ops sDirectoryOps = {
450 	NULL,		// read()
451 	NULL,		// write()
452 	NULL,		// seek()
453 	common_ioctl,
454 	NULL,		// set_flags
455 	NULL,		// select()
456 	NULL,		// deselect()
457 	dir_read,
458 	dir_rewind,
459 	common_read_stat,
460 	common_write_stat,
461 	dir_close,
462 	dir_free_fd
463 };
464 
465 static struct fd_ops sAttributeDirectoryOps = {
466 	NULL,		// read()
467 	NULL,		// write()
468 	NULL,		// seek()
469 	common_ioctl,
470 	NULL,		// set_flags
471 	NULL,		// select()
472 	NULL,		// deselect()
473 	attr_dir_read,
474 	attr_dir_rewind,
475 	common_read_stat,
476 	common_write_stat,
477 	attr_dir_close,
478 	attr_dir_free_fd
479 };
480 
481 static struct fd_ops sAttributeOps = {
482 	attr_read,
483 	attr_write,
484 	attr_seek,
485 	common_ioctl,
486 	NULL,		// set_flags
487 	NULL,		// select()
488 	NULL,		// deselect()
489 	NULL,		// read_dir()
490 	NULL,		// rewind_dir()
491 	attr_read_stat,
492 	attr_write_stat,
493 	attr_close,
494 	attr_free_fd
495 };
496 
497 static struct fd_ops sIndexDirectoryOps = {
498 	NULL,		// read()
499 	NULL,		// write()
500 	NULL,		// seek()
501 	NULL,		// ioctl()
502 	NULL,		// set_flags
503 	NULL,		// select()
504 	NULL,		// deselect()
505 	index_dir_read,
506 	index_dir_rewind,
507 	NULL,		// read_stat()
508 	NULL,		// write_stat()
509 	index_dir_close,
510 	index_dir_free_fd
511 };
512 
513 #if 0
514 static struct fd_ops sIndexOps = {
515 	NULL,		// read()
516 	NULL,		// write()
517 	NULL,		// seek()
518 	NULL,		// ioctl()
519 	NULL,		// set_flags
520 	NULL,		// select()
521 	NULL,		// deselect()
522 	NULL,		// dir_read()
523 	NULL,		// dir_rewind()
524 	index_read_stat,	// read_stat()
525 	NULL,		// write_stat()
526 	NULL,		// dir_close()
527 	NULL		// free_fd()
528 };
529 #endif
530 
531 static struct fd_ops sQueryOps = {
532 	NULL,		// read()
533 	NULL,		// write()
534 	NULL,		// seek()
535 	NULL,		// ioctl()
536 	NULL,		// set_flags
537 	NULL,		// select()
538 	NULL,		// deselect()
539 	query_read,
540 	query_rewind,
541 	NULL,		// read_stat()
542 	NULL,		// write_stat()
543 	query_close,
544 	query_free_fd
545 };
546 
547 
548 namespace {
549 
550 class FDCloser {
551 public:
552 	FDCloser() : fFD(-1), fKernel(true) {}
553 
554 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
555 
556 	~FDCloser()
557 	{
558 		Close();
559 	}
560 
561 	void SetTo(int fd, bool kernel)
562 	{
563 		Close();
564 		fFD = fd;
565 		fKernel = kernel;
566 	}
567 
568 	void Close()
569 	{
570 		if (fFD >= 0) {
571 			if (fKernel)
572 				_kern_close(fFD);
573 			else
574 				_user_close(fFD);
575 			fFD = -1;
576 		}
577 	}
578 
579 	int Detach()
580 	{
581 		int fd = fFD;
582 		fFD = -1;
583 		return fd;
584 	}
585 
586 private:
587 	int		fFD;
588 	bool	fKernel;
589 };
590 
591 } // namespace
592 
593 
594 #if VFS_PAGES_IO_TRACING
595 
596 namespace VFSPagesIOTracing {
597 
598 class PagesIOTraceEntry : public AbstractTraceEntry {
599 protected:
600 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
601 		const generic_io_vec* vecs, uint32 count, uint32 flags,
602 		generic_size_t bytesRequested, status_t status,
603 		generic_size_t bytesTransferred)
604 		:
605 		fVnode(vnode),
606 		fMountID(vnode->mount->id),
607 		fNodeID(vnode->id),
608 		fCookie(cookie),
609 		fPos(pos),
610 		fCount(count),
611 		fFlags(flags),
612 		fBytesRequested(bytesRequested),
613 		fStatus(status),
614 		fBytesTransferred(bytesTransferred)
615 	{
616 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
617 			sizeof(generic_io_vec) * count, false);
618 	}
619 
620 	void AddDump(TraceOutput& out, const char* mode)
621 	{
622 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
623 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
624 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
625 			(uint64)fBytesRequested);
626 
627 		if (fVecs != NULL) {
628 			for (uint32 i = 0; i < fCount; i++) {
629 				if (i > 0)
630 					out.Print(", ");
631 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
632 					(uint64)fVecs[i].length);
633 			}
634 		}
635 
636 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
637 			"transferred: %" B_PRIu64, fFlags, fStatus,
638 			(uint64)fBytesTransferred);
639 	}
640 
641 protected:
642 	struct vnode*	fVnode;
643 	dev_t			fMountID;
644 	ino_t			fNodeID;
645 	void*			fCookie;
646 	off_t			fPos;
647 	generic_io_vec*	fVecs;
648 	uint32			fCount;
649 	uint32			fFlags;
650 	generic_size_t	fBytesRequested;
651 	status_t		fStatus;
652 	generic_size_t	fBytesTransferred;
653 };
654 
655 
656 class ReadPages : public PagesIOTraceEntry {
657 public:
658 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
659 		const generic_io_vec* vecs, uint32 count, uint32 flags,
660 		generic_size_t bytesRequested, status_t status,
661 		generic_size_t bytesTransferred)
662 		:
663 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
664 			bytesRequested, status, bytesTransferred)
665 	{
666 		Initialized();
667 	}
668 
669 	virtual void AddDump(TraceOutput& out)
670 	{
671 		PagesIOTraceEntry::AddDump(out, "read");
672 	}
673 };
674 
675 
676 class WritePages : public PagesIOTraceEntry {
677 public:
678 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
679 		const generic_io_vec* vecs, uint32 count, uint32 flags,
680 		generic_size_t bytesRequested, status_t status,
681 		generic_size_t bytesTransferred)
682 		:
683 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
684 			bytesRequested, status, bytesTransferred)
685 	{
686 		Initialized();
687 	}
688 
689 	virtual void AddDump(TraceOutput& out)
690 	{
691 		PagesIOTraceEntry::AddDump(out, "write");
692 	}
693 };
694 
695 }	// namespace VFSPagesIOTracing
696 
697 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
698 #else
699 #	define TPIO(x) ;
700 #endif	// VFS_PAGES_IO_TRACING
701 
702 
703 /*! Finds the mounted device (the fs_mount structure) with the given ID.
704 	Note, you must hold the sMountLock lock when you call this function.
705 */
706 static struct fs_mount*
707 find_mount(dev_t id)
708 {
709 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
710 
711 	return sMountsTable->Lookup(id);
712 }
713 
714 
715 static status_t
716 get_mount(dev_t id, struct fs_mount** _mount)
717 {
718 	struct fs_mount* mount;
719 
720 	ReadLocker nodeLocker(sVnodeLock);
721 	ReadLocker mountLocker(sMountLock);
722 
723 	mount = find_mount(id);
724 	if (mount == NULL)
725 		return B_BAD_VALUE;
726 
727 	struct vnode* rootNode = mount->root_vnode;
728 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
729 		|| rootNode->ref_count == 0) {
730 		// might have been called during a mount/unmount operation
731 		return B_BUSY;
732 	}
733 
734 	inc_vnode_ref_count(rootNode);
735 	*_mount = mount;
736 	return B_OK;
737 }
738 
739 
740 static void
741 put_mount(struct fs_mount* mount)
742 {
743 	if (mount)
744 		put_vnode(mount->root_vnode);
745 }
746 
747 
748 /*!	Tries to open the specified file system module.
749 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
750 	Returns a pointer to file system module interface, or NULL if it
751 	could not open the module.
752 */
753 static file_system_module_info*
754 get_file_system(const char* fsName)
755 {
756 	char name[B_FILE_NAME_LENGTH];
757 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
758 		// construct module name if we didn't get one
759 		// (we currently support only one API)
760 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
761 		fsName = NULL;
762 	}
763 
764 	file_system_module_info* info;
765 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
766 		return NULL;
767 
768 	return info;
769 }
770 
771 
772 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
773 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
774 	The name is allocated for you, and you have to free() it when you're
775 	done with it.
776 	Returns NULL if the required memory is not available.
777 */
778 static char*
779 get_file_system_name(const char* fsName)
780 {
781 	const size_t length = strlen("file_systems/");
782 
783 	if (strncmp(fsName, "file_systems/", length)) {
784 		// the name already seems to be the module's file name
785 		return strdup(fsName);
786 	}
787 
788 	fsName += length;
789 	const char* end = strchr(fsName, '/');
790 	if (end == NULL) {
791 		// this doesn't seem to be a valid name, but well...
792 		return strdup(fsName);
793 	}
794 
795 	// cut off the trailing /v1
796 
797 	char* name = (char*)malloc(end + 1 - fsName);
798 	if (name == NULL)
799 		return NULL;
800 
801 	strlcpy(name, fsName, end + 1 - fsName);
802 	return name;
803 }
804 
805 
806 /*!	Accepts a list of file system names separated by a colon, one for each
807 	layer and returns the file system name for the specified layer.
808 	The name is allocated for you, and you have to free() it when you're
809 	done with it.
810 	Returns NULL if the required memory is not available or if there is no
811 	name for the specified layer.
812 */
813 static char*
814 get_file_system_name_for_layer(const char* fsNames, int32 layer)
815 {
816 	while (layer >= 0) {
817 		const char* end = strchr(fsNames, ':');
818 		if (end == NULL) {
819 			if (layer == 0)
820 				return strdup(fsNames);
821 			return NULL;
822 		}
823 
824 		if (layer == 0) {
825 			size_t length = end - fsNames + 1;
826 			char* result = (char*)malloc(length);
827 			strlcpy(result, fsNames, length);
828 			return result;
829 		}
830 
831 		fsNames = end + 1;
832 		layer--;
833 	}
834 
835 	return NULL;
836 }
837 
838 
839 static void
840 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
841 {
842 	MutexLocker _(mount->lock);
843 	mount->vnodes.Add(vnode);
844 }
845 
846 
847 static void
848 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
849 {
850 	MutexLocker _(mount->lock);
851 	mount->vnodes.Remove(vnode);
852 }
853 
854 
855 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
856 
857 	The caller must hold the sVnodeLock (read lock at least).
858 
859 	\param mountID the mount ID.
860 	\param vnodeID the node ID.
861 
862 	\return The vnode structure, if it was found in the hash table, \c NULL
863 			otherwise.
864 */
865 static struct vnode*
866 lookup_vnode(dev_t mountID, ino_t vnodeID)
867 {
868 	ASSERT_READ_LOCKED_RW_LOCK(&sVnodeLock);
869 
870 	struct vnode_hash_key key;
871 
872 	key.device = mountID;
873 	key.vnode = vnodeID;
874 
875 	return sVnodeTable->Lookup(key);
876 }
877 
878 
879 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
880 
881 	This will also wait for BUSY_VNODE_DELAY before returning if one should
882 	still wait for the vnode becoming unbusy.
883 
884 	\return \c true if one should retry, \c false if not.
885 */
886 static bool
887 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
888 {
889 	if (--tries < 0) {
890 		// vnode doesn't seem to become unbusy
891 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
892 			" is not becoming unbusy!\n", mountID, vnodeID);
893 		return false;
894 	}
895 	snooze(BUSY_VNODE_DELAY);
896 	return true;
897 }
898 
899 
900 /*!	Creates a new vnode with the given mount and node ID.
901 	If the node already exists, it is returned instead and no new node is
902 	created. In either case -- but not, if an error occurs -- the function write
903 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
904 	error the lock is not held on return.
905 
906 	\param mountID The mount ID.
907 	\param vnodeID The vnode ID.
908 	\param _vnode Will be set to the new vnode on success.
909 	\param _nodeCreated Will be set to \c true when the returned vnode has
910 		been newly created, \c false when it already existed. Will not be
911 		changed on error.
912 	\return \c B_OK, when the vnode was successfully created and inserted or
913 		a node with the given ID was found, \c B_NO_MEMORY or
914 		\c B_ENTRY_NOT_FOUND on error.
915 */
916 static status_t
917 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
918 	bool& _nodeCreated)
919 {
920 	FUNCTION(("create_new_vnode_and_lock()\n"));
921 
922 	struct vnode* vnode = (struct vnode*)object_cache_alloc(sVnodeCache, 0);
923 	if (vnode == NULL)
924 		return B_NO_MEMORY;
925 
926 	// initialize basic values
927 	memset(vnode, 0, sizeof(struct vnode));
928 	vnode->device = mountID;
929 	vnode->id = vnodeID;
930 	vnode->ref_count = 1;
931 	vnode->SetBusy(true);
932 
933 	// look up the node -- it might have been added by someone else in the
934 	// meantime
935 	rw_lock_write_lock(&sVnodeLock);
936 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
937 	if (existingVnode != NULL) {
938 		object_cache_free(sVnodeCache, vnode, 0);
939 		_vnode = existingVnode;
940 		_nodeCreated = false;
941 		return B_OK;
942 	}
943 
944 	// get the mount structure
945 	rw_lock_read_lock(&sMountLock);
946 	vnode->mount = find_mount(mountID);
947 	if (!vnode->mount || vnode->mount->unmounting) {
948 		rw_lock_read_unlock(&sMountLock);
949 		rw_lock_write_unlock(&sVnodeLock);
950 		object_cache_free(sVnodeCache, vnode, 0);
951 		return B_ENTRY_NOT_FOUND;
952 	}
953 
954 	// add the vnode to the mount's node list and the hash table
955 	sVnodeTable->Insert(vnode);
956 	add_vnode_to_mount_list(vnode, vnode->mount);
957 
958 	rw_lock_read_unlock(&sMountLock);
959 
960 	_vnode = vnode;
961 	_nodeCreated = true;
962 
963 	// keep the vnode lock locked
964 	return B_OK;
965 }
966 
967 
968 /*!	Frees the vnode and all resources it has acquired, and removes
969 	it from the vnode hash as well as from its mount structure.
970 	Will also make sure that any cache modifications are written back.
971 */
972 static void
973 free_vnode(struct vnode* vnode, bool reenter)
974 {
975 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
976 		vnode);
977 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
978 
979 	// write back any changes in this vnode's cache -- but only
980 	// if the vnode won't be deleted, in which case the changes
981 	// will be discarded
982 
983 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
984 		FS_CALL_NO_PARAMS(vnode, fsync);
985 
986 	// Note: If this vnode has a cache attached, there will still be two
987 	// references to that cache at this point. The last one belongs to the vnode
988 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
989 	// cache. Each but the last reference to a cache also includes a reference
990 	// to the vnode. The file cache, however, released its reference (cf.
991 	// file_cache_create()), so that this vnode's ref count has the chance to
992 	// ever drop to 0. Deleting the file cache now, will cause the next to last
993 	// cache reference to be released, which will also release a (no longer
994 	// existing) vnode reference. To avoid problems, we set the vnode's ref
995 	// count, so that it will neither become negative nor 0.
996 	vnode->ref_count = 2;
997 
998 	if (!vnode->IsUnpublished()) {
999 		if (vnode->IsRemoved())
1000 			FS_CALL(vnode, remove_vnode, reenter);
1001 		else
1002 			FS_CALL(vnode, put_vnode, reenter);
1003 	}
1004 
1005 	// If the vnode has a VMCache attached, make sure that it won't try to get
1006 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1007 	// long as the vnode is busy and in the hash, that won't happen, but as
1008 	// soon as we've removed it from the hash, it could reload the vnode -- with
1009 	// a new cache attached!
1010 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1011 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1012 
1013 	// The file system has removed the resources of the vnode now, so we can
1014 	// make it available again (by removing the busy vnode from the hash).
1015 	rw_lock_write_lock(&sVnodeLock);
1016 	sVnodeTable->Remove(vnode);
1017 	rw_lock_write_unlock(&sVnodeLock);
1018 
1019 	// if we have a VMCache attached, remove it
1020 	if (vnode->cache)
1021 		vnode->cache->ReleaseRef();
1022 
1023 	vnode->cache = NULL;
1024 
1025 	remove_vnode_from_mount_list(vnode, vnode->mount);
1026 
1027 	object_cache_free(sVnodeCache, vnode, 0);
1028 }
1029 
1030 
1031 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1032 	if the counter dropped to 0.
1033 
1034 	The caller must, of course, own a reference to the vnode to call this
1035 	function.
1036 	The caller must not hold the sVnodeLock or the sMountLock.
1037 
1038 	\param vnode the vnode.
1039 	\param alwaysFree don't move this vnode into the unused list, but really
1040 		   delete it if possible.
1041 	\param reenter \c true, if this function is called (indirectly) from within
1042 		   a file system. This will be passed to file system hooks only.
1043 	\return \c B_OK, if everything went fine, an error code otherwise.
1044 */
1045 static status_t
1046 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1047 {
1048 	ReadLocker locker(sVnodeLock);
1049 	AutoLocker<Vnode> nodeLocker(vnode);
1050 
1051 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1052 
1053 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1054 
1055 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1056 		vnode->ref_count));
1057 
1058 	if (oldRefCount != 1)
1059 		return B_OK;
1060 
1061 	if (vnode->IsBusy())
1062 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1063 
1064 	bool freeNode = false;
1065 	bool freeUnusedNodes = false;
1066 
1067 	// Just insert the vnode into an unused list if we don't need
1068 	// to delete it
1069 	if (vnode->IsRemoved() || alwaysFree) {
1070 		vnode_to_be_freed(vnode);
1071 		vnode->SetBusy(true);
1072 		freeNode = true;
1073 	} else
1074 		freeUnusedNodes = vnode_unused(vnode);
1075 
1076 	nodeLocker.Unlock();
1077 	locker.Unlock();
1078 
1079 	if (freeNode)
1080 		free_vnode(vnode, reenter);
1081 	else if (freeUnusedNodes)
1082 		free_unused_vnodes();
1083 
1084 	return B_OK;
1085 }
1086 
1087 
1088 /*!	\brief Increments the reference counter of the given vnode.
1089 
1090 	The caller must make sure that the node isn't deleted while this function
1091 	is called. This can be done either:
1092 	- by ensuring that a reference to the node exists and remains in existence,
1093 	  or
1094 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1095 	  or by holding sVnodeLock write locked.
1096 
1097 	In the second case the caller is responsible for dealing with the ref count
1098 	0 -> 1 transition. That is 1. this function must not be invoked when the
1099 	node is busy in the first place and 2. vnode_used() must be called for the
1100 	node.
1101 
1102 	\param vnode the vnode.
1103 */
1104 static void
1105 inc_vnode_ref_count(struct vnode* vnode)
1106 {
1107 	atomic_add(&vnode->ref_count, 1);
1108 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1109 		vnode->ref_count));
1110 }
1111 
1112 
1113 static bool
1114 is_special_node_type(int type)
1115 {
1116 	// at the moment only FIFOs are supported
1117 	return S_ISFIFO(type);
1118 }
1119 
1120 
1121 static status_t
1122 create_special_sub_node(struct vnode* vnode, uint32 flags)
1123 {
1124 	if (S_ISFIFO(vnode->Type()))
1125 		return create_fifo_vnode(vnode->mount->volume, vnode);
1126 
1127 	return B_BAD_VALUE;
1128 }
1129 
1130 
1131 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1132 
1133 	If the node is not yet in memory, it will be loaded.
1134 
1135 	The caller must not hold the sVnodeLock or the sMountLock.
1136 
1137 	\param mountID the mount ID.
1138 	\param vnodeID the node ID.
1139 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1140 		   retrieved vnode structure shall be written.
1141 	\param reenter \c true, if this function is called (indirectly) from within
1142 		   a file system.
1143 	\return \c B_OK, if everything when fine, an error code otherwise.
1144 */
1145 static status_t
1146 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1147 	int reenter)
1148 {
1149 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1150 		mountID, vnodeID, _vnode));
1151 
1152 	rw_lock_read_lock(&sVnodeLock);
1153 
1154 	int32 tries = BUSY_VNODE_RETRIES;
1155 restart:
1156 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1157 	AutoLocker<Vnode> nodeLocker(vnode);
1158 
1159 	if (vnode && vnode->IsBusy()) {
1160 		// vnodes in the Removed state (except ones still Unpublished)
1161 		// which are also Busy will disappear soon, so we do not wait for them.
1162 		const bool doNotWait = vnode->IsRemoved() && !vnode->IsUnpublished();
1163 
1164 		nodeLocker.Unlock();
1165 		rw_lock_read_unlock(&sVnodeLock);
1166 		if (!canWait) {
1167 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1168 				mountID, vnodeID);
1169 			return B_BUSY;
1170 		}
1171 		if (doNotWait || !retry_busy_vnode(tries, mountID, vnodeID))
1172 			return B_BUSY;
1173 
1174 		rw_lock_read_lock(&sVnodeLock);
1175 		goto restart;
1176 	}
1177 
1178 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1179 
1180 	status_t status;
1181 
1182 	if (vnode) {
1183 		if (vnode->ref_count == 0) {
1184 			// this vnode has been unused before
1185 			vnode_used(vnode);
1186 		}
1187 		inc_vnode_ref_count(vnode);
1188 
1189 		nodeLocker.Unlock();
1190 		rw_lock_read_unlock(&sVnodeLock);
1191 	} else {
1192 		// we need to create a new vnode and read it in
1193 		rw_lock_read_unlock(&sVnodeLock);
1194 			// unlock -- create_new_vnode_and_lock() write-locks on success
1195 		bool nodeCreated;
1196 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1197 			nodeCreated);
1198 		if (status != B_OK)
1199 			return status;
1200 
1201 		if (!nodeCreated) {
1202 			rw_lock_read_lock(&sVnodeLock);
1203 			rw_lock_write_unlock(&sVnodeLock);
1204 			goto restart;
1205 		}
1206 
1207 		rw_lock_write_unlock(&sVnodeLock);
1208 
1209 		int type;
1210 		uint32 flags;
1211 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1212 			&flags, reenter);
1213 		if (status == B_OK && vnode->private_node == NULL)
1214 			status = B_BAD_VALUE;
1215 
1216 		bool gotNode = status == B_OK;
1217 		bool publishSpecialSubNode = false;
1218 		if (gotNode) {
1219 			vnode->SetType(type);
1220 			publishSpecialSubNode = is_special_node_type(type)
1221 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1222 		}
1223 
1224 		if (gotNode && publishSpecialSubNode)
1225 			status = create_special_sub_node(vnode, flags);
1226 
1227 		if (status != B_OK) {
1228 			if (gotNode)
1229 				FS_CALL(vnode, put_vnode, reenter);
1230 
1231 			rw_lock_write_lock(&sVnodeLock);
1232 			sVnodeTable->Remove(vnode);
1233 			remove_vnode_from_mount_list(vnode, vnode->mount);
1234 			rw_lock_write_unlock(&sVnodeLock);
1235 
1236 			object_cache_free(sVnodeCache, vnode, 0);
1237 			return status;
1238 		}
1239 
1240 		rw_lock_read_lock(&sVnodeLock);
1241 		vnode->Lock();
1242 
1243 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1244 		vnode->SetBusy(false);
1245 
1246 		vnode->Unlock();
1247 		rw_lock_read_unlock(&sVnodeLock);
1248 	}
1249 
1250 	TRACE(("get_vnode: returning %p\n", vnode));
1251 
1252 	*_vnode = vnode;
1253 	return B_OK;
1254 }
1255 
1256 
1257 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1258 	if the counter dropped to 0.
1259 
1260 	The caller must, of course, own a reference to the vnode to call this
1261 	function.
1262 	The caller must not hold the sVnodeLock or the sMountLock.
1263 
1264 	\param vnode the vnode.
1265 */
1266 static inline void
1267 put_vnode(struct vnode* vnode)
1268 {
1269 	dec_vnode_ref_count(vnode, false, false);
1270 }
1271 
1272 
1273 static void
1274 free_unused_vnodes(int32 level)
1275 {
1276 	unused_vnodes_check_started();
1277 
1278 	if (level == B_NO_LOW_RESOURCE) {
1279 		unused_vnodes_check_done();
1280 		return;
1281 	}
1282 
1283 	flush_hot_vnodes();
1284 
1285 	// determine how many nodes to free
1286 	uint32 count = 1;
1287 	{
1288 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1289 
1290 		switch (level) {
1291 			case B_LOW_RESOURCE_NOTE:
1292 				count = sUnusedVnodes / 100;
1293 				break;
1294 			case B_LOW_RESOURCE_WARNING:
1295 				count = sUnusedVnodes / 10;
1296 				break;
1297 			case B_LOW_RESOURCE_CRITICAL:
1298 				count = sUnusedVnodes;
1299 				break;
1300 		}
1301 
1302 		if (count > sUnusedVnodes)
1303 			count = sUnusedVnodes;
1304 	}
1305 
1306 	// Write back the modified pages of some unused vnodes and free them.
1307 
1308 	for (uint32 i = 0; i < count; i++) {
1309 		ReadLocker vnodesReadLocker(sVnodeLock);
1310 
1311 		// get the first node
1312 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1313 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1314 			&sUnusedVnodeList);
1315 		unusedVnodesLocker.Unlock();
1316 
1317 		if (vnode == NULL)
1318 			break;
1319 
1320 		// lock the node
1321 		AutoLocker<Vnode> nodeLocker(vnode);
1322 
1323 		// Check whether the node is still unused -- since we only append to the
1324 		// tail of the unused queue, the vnode should still be at its head.
1325 		// Alternatively we could check its ref count for 0 and its busy flag,
1326 		// but if the node is no longer at the head of the queue, it means it
1327 		// has been touched in the meantime, i.e. it is no longer the least
1328 		// recently used unused vnode and we rather don't free it.
1329 		unusedVnodesLocker.Lock();
1330 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1331 			continue;
1332 		unusedVnodesLocker.Unlock();
1333 
1334 		ASSERT(!vnode->IsBusy());
1335 
1336 		// grab a reference
1337 		inc_vnode_ref_count(vnode);
1338 		vnode_used(vnode);
1339 
1340 		// write back changes and free the node
1341 		nodeLocker.Unlock();
1342 		vnodesReadLocker.Unlock();
1343 
1344 		if (vnode->cache != NULL)
1345 			vnode->cache->WriteModified();
1346 
1347 		dec_vnode_ref_count(vnode, true, false);
1348 			// this should free the vnode when it's still unused
1349 	}
1350 
1351 	unused_vnodes_check_done();
1352 }
1353 
1354 
1355 /*!	Gets the vnode the given vnode is covering.
1356 
1357 	The caller must have \c sVnodeLock read-locked at least.
1358 
1359 	The function returns a reference to the retrieved vnode (if any), the caller
1360 	is responsible to free.
1361 
1362 	\param vnode The vnode whose covered node shall be returned.
1363 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1364 		vnode.
1365 */
1366 static inline Vnode*
1367 get_covered_vnode_locked(Vnode* vnode)
1368 {
1369 	if (Vnode* coveredNode = vnode->covers) {
1370 		while (coveredNode->covers != NULL)
1371 			coveredNode = coveredNode->covers;
1372 
1373 		inc_vnode_ref_count(coveredNode);
1374 		return coveredNode;
1375 	}
1376 
1377 	return NULL;
1378 }
1379 
1380 
1381 /*!	Gets the vnode the given vnode is covering.
1382 
1383 	The caller must not hold \c sVnodeLock. Note that this implies a race
1384 	condition, since the situation can change at any time.
1385 
1386 	The function returns a reference to the retrieved vnode (if any), the caller
1387 	is responsible to free.
1388 
1389 	\param vnode The vnode whose covered node shall be returned.
1390 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1391 		vnode.
1392 */
1393 static inline Vnode*
1394 get_covered_vnode(Vnode* vnode)
1395 {
1396 	if (!vnode->IsCovering())
1397 		return NULL;
1398 
1399 	ReadLocker vnodeReadLocker(sVnodeLock);
1400 	return get_covered_vnode_locked(vnode);
1401 }
1402 
1403 
1404 /*!	Gets the vnode the given vnode is covered by.
1405 
1406 	The caller must have \c sVnodeLock read-locked at least.
1407 
1408 	The function returns a reference to the retrieved vnode (if any), the caller
1409 	is responsible to free.
1410 
1411 	\param vnode The vnode whose covering node shall be returned.
1412 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1413 		any vnode.
1414 */
1415 static Vnode*
1416 get_covering_vnode_locked(Vnode* vnode)
1417 {
1418 	if (Vnode* coveringNode = vnode->covered_by) {
1419 		while (coveringNode->covered_by != NULL)
1420 			coveringNode = coveringNode->covered_by;
1421 
1422 		inc_vnode_ref_count(coveringNode);
1423 		return coveringNode;
1424 	}
1425 
1426 	return NULL;
1427 }
1428 
1429 
1430 /*!	Gets the vnode the given vnode is covered by.
1431 
1432 	The caller must not hold \c sVnodeLock. Note that this implies a race
1433 	condition, since the situation can change at any time.
1434 
1435 	The function returns a reference to the retrieved vnode (if any), the caller
1436 	is responsible to free.
1437 
1438 	\param vnode The vnode whose covering node shall be returned.
1439 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1440 		any vnode.
1441 */
1442 static inline Vnode*
1443 get_covering_vnode(Vnode* vnode)
1444 {
1445 	if (!vnode->IsCovered())
1446 		return NULL;
1447 
1448 	ReadLocker vnodeReadLocker(sVnodeLock);
1449 	return get_covering_vnode_locked(vnode);
1450 }
1451 
1452 
1453 static void
1454 free_unused_vnodes()
1455 {
1456 	free_unused_vnodes(
1457 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1458 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1459 }
1460 
1461 
1462 static void
1463 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1464 {
1465 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1466 
1467 	free_unused_vnodes(level);
1468 }
1469 
1470 
1471 static inline void
1472 put_advisory_locking(struct advisory_locking* locking)
1473 {
1474 	release_sem(locking->lock);
1475 }
1476 
1477 
1478 /*!	Returns the advisory_locking object of the \a vnode in case it
1479 	has one, and locks it.
1480 	You have to call put_advisory_locking() when you're done with
1481 	it.
1482 	Note, you must not have the vnode mutex locked when calling
1483 	this function.
1484 */
1485 static struct advisory_locking*
1486 get_advisory_locking(struct vnode* vnode)
1487 {
1488 	rw_lock_read_lock(&sVnodeLock);
1489 	vnode->Lock();
1490 
1491 	struct advisory_locking* locking = vnode->advisory_locking;
1492 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1493 
1494 	vnode->Unlock();
1495 	rw_lock_read_unlock(&sVnodeLock);
1496 
1497 	if (lock >= 0)
1498 		lock = acquire_sem(lock);
1499 	if (lock < 0) {
1500 		// This means the locking has been deleted in the mean time
1501 		// or had never existed in the first place - otherwise, we
1502 		// would get the lock at some point.
1503 		return NULL;
1504 	}
1505 
1506 	return locking;
1507 }
1508 
1509 
1510 /*!	Creates a locked advisory_locking object, and attaches it to the
1511 	given \a vnode.
1512 	Returns B_OK in case of success - also if the vnode got such an
1513 	object from someone else in the mean time, you'll still get this
1514 	one locked then.
1515 */
1516 static status_t
1517 create_advisory_locking(struct vnode* vnode)
1518 {
1519 	if (vnode == NULL)
1520 		return B_FILE_ERROR;
1521 
1522 	ObjectDeleter<advisory_locking> lockingDeleter;
1523 	struct advisory_locking* locking = NULL;
1524 
1525 	while (get_advisory_locking(vnode) == NULL) {
1526 		// no locking object set on the vnode yet, create one
1527 		if (locking == NULL) {
1528 			locking = new(std::nothrow) advisory_locking;
1529 			if (locking == NULL)
1530 				return B_NO_MEMORY;
1531 			lockingDeleter.SetTo(locking);
1532 
1533 			locking->wait_sem = create_sem(0, "advisory lock");
1534 			if (locking->wait_sem < 0)
1535 				return locking->wait_sem;
1536 
1537 			locking->lock = create_sem(0, "advisory locking");
1538 			if (locking->lock < 0)
1539 				return locking->lock;
1540 		}
1541 
1542 		// set our newly created locking object
1543 		ReadLocker _(sVnodeLock);
1544 		AutoLocker<Vnode> nodeLocker(vnode);
1545 		if (vnode->advisory_locking == NULL) {
1546 			vnode->advisory_locking = locking;
1547 			lockingDeleter.Detach();
1548 			return B_OK;
1549 		}
1550 	}
1551 
1552 	// The vnode already had a locking object. That's just as well.
1553 
1554 	return B_OK;
1555 }
1556 
1557 
1558 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1559 	with the advisory_lock \a lock.
1560 */
1561 static bool
1562 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1563 {
1564 	if (flock == NULL)
1565 		return true;
1566 
1567 	return lock->start <= flock->l_start - 1 + flock->l_len
1568 		&& lock->end >= flock->l_start;
1569 }
1570 
1571 
1572 /*!	Tests whether acquiring a lock would block.
1573 */
1574 static status_t
1575 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1576 {
1577 	flock->l_type = F_UNLCK;
1578 
1579 	struct advisory_locking* locking = get_advisory_locking(vnode);
1580 	if (locking == NULL)
1581 		return B_OK;
1582 
1583 	team_id team = team_get_current_team_id();
1584 
1585 	LockList::Iterator iterator = locking->locks.GetIterator();
1586 	while (iterator.HasNext()) {
1587 		struct advisory_lock* lock = iterator.Next();
1588 
1589 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1590 			// locks do overlap
1591 			if (flock->l_type != F_RDLCK || !lock->shared) {
1592 				// collision
1593 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1594 				flock->l_whence = SEEK_SET;
1595 				flock->l_start = lock->start;
1596 				flock->l_len = lock->end - lock->start + 1;
1597 				flock->l_pid = lock->team;
1598 				break;
1599 			}
1600 		}
1601 	}
1602 
1603 	put_advisory_locking(locking);
1604 	return B_OK;
1605 }
1606 
1607 
1608 /*!	Removes the specified lock, or all locks of the calling team
1609 	if \a flock is NULL.
1610 */
1611 static status_t
1612 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1613 	struct file_descriptor* descriptor, struct flock* flock)
1614 {
1615 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1616 
1617 	struct advisory_locking* locking = get_advisory_locking(vnode);
1618 	if (locking == NULL)
1619 		return B_OK;
1620 
1621 	// find matching lock entries
1622 
1623 	LockList::Iterator iterator = locking->locks.GetIterator();
1624 	while (iterator.HasNext()) {
1625 		struct advisory_lock* lock = iterator.Next();
1626 		bool removeLock = false;
1627 
1628 		if (descriptor != NULL && lock->bound_to == descriptor) {
1629 			// Remove flock() locks
1630 			removeLock = true;
1631 		} else if (lock->bound_to == context
1632 				&& advisory_lock_intersects(lock, flock)) {
1633 			// Remove POSIX locks
1634 			bool endsBeyond = false;
1635 			bool startsBefore = false;
1636 			if (flock != NULL) {
1637 				startsBefore = lock->start < flock->l_start;
1638 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1639 			}
1640 
1641 			if (!startsBefore && !endsBeyond) {
1642 				// lock is completely contained in flock
1643 				removeLock = true;
1644 			} else if (startsBefore && !endsBeyond) {
1645 				// cut the end of the lock
1646 				lock->end = flock->l_start - 1;
1647 			} else if (!startsBefore && endsBeyond) {
1648 				// cut the start of the lock
1649 				lock->start = flock->l_start + flock->l_len;
1650 			} else {
1651 				// divide the lock into two locks
1652 				struct advisory_lock* secondLock = new advisory_lock;
1653 				if (secondLock == NULL) {
1654 					// TODO: we should probably revert the locks we already
1655 					// changed... (ie. allocate upfront)
1656 					put_advisory_locking(locking);
1657 					return B_NO_MEMORY;
1658 				}
1659 
1660 				lock->end = flock->l_start - 1;
1661 
1662 				secondLock->bound_to = context;
1663 				secondLock->team = lock->team;
1664 				secondLock->session = lock->session;
1665 				// values must already be normalized when getting here
1666 				secondLock->start = flock->l_start + flock->l_len;
1667 				secondLock->end = lock->end;
1668 				secondLock->shared = lock->shared;
1669 
1670 				locking->locks.Add(secondLock);
1671 			}
1672 		}
1673 
1674 		if (removeLock) {
1675 			// this lock is no longer used
1676 			iterator.Remove();
1677 			delete lock;
1678 		}
1679 	}
1680 
1681 	bool removeLocking = locking->locks.IsEmpty();
1682 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1683 
1684 	put_advisory_locking(locking);
1685 
1686 	if (removeLocking) {
1687 		// We can remove the whole advisory locking structure; it's no
1688 		// longer used
1689 		locking = get_advisory_locking(vnode);
1690 		if (locking != NULL) {
1691 			ReadLocker locker(sVnodeLock);
1692 			AutoLocker<Vnode> nodeLocker(vnode);
1693 
1694 			// the locking could have been changed in the mean time
1695 			if (locking->locks.IsEmpty()) {
1696 				vnode->advisory_locking = NULL;
1697 				nodeLocker.Unlock();
1698 				locker.Unlock();
1699 
1700 				// we've detached the locking from the vnode, so we can
1701 				// safely delete it
1702 				delete locking;
1703 			} else {
1704 				// the locking is in use again
1705 				nodeLocker.Unlock();
1706 				locker.Unlock();
1707 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1708 			}
1709 		}
1710 	}
1711 
1712 	return B_OK;
1713 }
1714 
1715 
1716 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1717 	will wait for the lock to become available, if there are any collisions
1718 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1719 
1720 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1721 	BSD flock() semantics are used, that is, all children can unlock the file
1722 	in question (we even allow parents to remove the lock, though, but that
1723 	seems to be in line to what the BSD's are doing).
1724 */
1725 static status_t
1726 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1727 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1728 {
1729 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1730 		vnode, flock, wait ? "yes" : "no"));
1731 
1732 	bool shared = flock->l_type == F_RDLCK;
1733 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1734 	status_t status = B_OK;
1735 
1736 	// TODO: do deadlock detection!
1737 
1738 	struct advisory_locking* locking;
1739 
1740 	while (true) {
1741 		// if this vnode has an advisory_locking structure attached,
1742 		// lock that one and search for any colliding file lock
1743 		status = create_advisory_locking(vnode);
1744 		if (status != B_OK)
1745 			return status;
1746 
1747 		locking = vnode->advisory_locking;
1748 		team_id team = team_get_current_team_id();
1749 		sem_id waitForLock = -1;
1750 
1751 		// test for collisions
1752 		LockList::Iterator iterator = locking->locks.GetIterator();
1753 		while (iterator.HasNext()) {
1754 			struct advisory_lock* lock = iterator.Next();
1755 
1756 			// TODO: locks from the same team might be joinable!
1757 			if ((lock->team != team || lock->bound_to != boundTo)
1758 					&& advisory_lock_intersects(lock, flock)) {
1759 				// locks do overlap
1760 				if (!shared || !lock->shared) {
1761 					// we need to wait
1762 					waitForLock = locking->wait_sem;
1763 					break;
1764 				}
1765 			}
1766 		}
1767 
1768 		if (waitForLock < 0)
1769 			break;
1770 
1771 		// We need to wait. Do that or fail now, if we've been asked not to.
1772 
1773 		if (!wait) {
1774 			put_advisory_locking(locking);
1775 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1776 		}
1777 
1778 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1779 			B_CAN_INTERRUPT, 0);
1780 		if (status != B_OK && status != B_BAD_SEM_ID)
1781 			return status;
1782 
1783 		// We have been notified, but we need to re-lock the locking object. So
1784 		// go another round...
1785 	}
1786 
1787 	// install new lock
1788 
1789 	struct advisory_lock* lock = new(std::nothrow) advisory_lock;
1790 	if (lock == NULL) {
1791 		put_advisory_locking(locking);
1792 		return B_NO_MEMORY;
1793 	}
1794 
1795 	lock->bound_to = boundTo;
1796 	lock->team = team_get_current_team_id();
1797 	lock->session = thread_get_current_thread()->team->session_id;
1798 	// values must already be normalized when getting here
1799 	lock->start = flock->l_start;
1800 	lock->end = flock->l_start - 1 + flock->l_len;
1801 	lock->shared = shared;
1802 
1803 	locking->locks.Add(lock);
1804 	put_advisory_locking(locking);
1805 
1806 	return status;
1807 }
1808 
1809 
1810 /*!	Normalizes the \a flock structure to make it easier to compare the
1811 	structure with others. The l_start and l_len fields are set to absolute
1812 	values according to the l_whence field.
1813 */
1814 static status_t
1815 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1816 {
1817 	switch (flock->l_whence) {
1818 		case SEEK_SET:
1819 			break;
1820 		case SEEK_CUR:
1821 			flock->l_start += descriptor->pos;
1822 			break;
1823 		case SEEK_END:
1824 		{
1825 			struct vnode* vnode = descriptor->u.vnode;
1826 			struct stat stat;
1827 			status_t status;
1828 
1829 			if (!HAS_FS_CALL(vnode, read_stat))
1830 				return B_UNSUPPORTED;
1831 
1832 			status = FS_CALL(vnode, read_stat, &stat);
1833 			if (status != B_OK)
1834 				return status;
1835 
1836 			flock->l_start += stat.st_size;
1837 			break;
1838 		}
1839 		default:
1840 			return B_BAD_VALUE;
1841 	}
1842 
1843 	if (flock->l_start < 0)
1844 		flock->l_start = 0;
1845 	if (flock->l_len == 0)
1846 		flock->l_len = OFF_MAX;
1847 
1848 	// don't let the offset and length overflow
1849 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1850 		flock->l_len = OFF_MAX - flock->l_start;
1851 
1852 	if (flock->l_len < 0) {
1853 		// a negative length reverses the region
1854 		flock->l_start += flock->l_len;
1855 		flock->l_len = -flock->l_len;
1856 	}
1857 
1858 	return B_OK;
1859 }
1860 
1861 
1862 static void
1863 replace_vnode_if_disconnected(struct fs_mount* mount,
1864 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1865 	struct vnode* fallBack, bool lockRootLock)
1866 {
1867 	struct vnode* givenVnode = vnode;
1868 	bool vnodeReplaced = false;
1869 
1870 	ReadLocker vnodeReadLocker(sVnodeLock);
1871 
1872 	if (lockRootLock)
1873 		mutex_lock(&sIOContextRootLock);
1874 
1875 	while (vnode != NULL && vnode->mount == mount
1876 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1877 		if (vnode->covers != NULL) {
1878 			// redirect the vnode to the covered vnode
1879 			vnode = vnode->covers;
1880 		} else
1881 			vnode = fallBack;
1882 
1883 		vnodeReplaced = true;
1884 	}
1885 
1886 	// If we've replaced the node, grab a reference for the new one.
1887 	if (vnodeReplaced && vnode != NULL)
1888 		inc_vnode_ref_count(vnode);
1889 
1890 	if (lockRootLock)
1891 		mutex_unlock(&sIOContextRootLock);
1892 
1893 	vnodeReadLocker.Unlock();
1894 
1895 	if (vnodeReplaced)
1896 		put_vnode(givenVnode);
1897 }
1898 
1899 
1900 /*!	Disconnects all file descriptors that are associated with the
1901 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1902 	\a mount object.
1903 
1904 	Note, after you've called this function, there might still be ongoing
1905 	accesses - they won't be interrupted if they already happened before.
1906 	However, any subsequent access will fail.
1907 
1908 	This is not a cheap function and should be used with care and rarely.
1909 	TODO: there is currently no means to stop a blocking read/write!
1910 */
1911 static void
1912 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1913 	struct vnode* vnodeToDisconnect)
1914 {
1915 	// iterate over all teams and peek into their file descriptors
1916 	TeamListIterator teamIterator;
1917 	while (Team* team = teamIterator.Next()) {
1918 		BReference<Team> teamReference(team, true);
1919 		TeamLocker teamLocker(team);
1920 
1921 		// lock the I/O context
1922 		io_context* context = team->io_context;
1923 		if (context == NULL)
1924 			continue;
1925 		MutexLocker contextLocker(context->io_mutex);
1926 
1927 		teamLocker.Unlock();
1928 
1929 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1930 			sRoot, true);
1931 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1932 			sRoot, false);
1933 
1934 		for (uint32 i = 0; i < context->table_size; i++) {
1935 			struct file_descriptor* descriptor = context->fds[i];
1936 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1937 				continue;
1938 
1939 			inc_fd_ref_count(descriptor);
1940 
1941 			// if this descriptor points at this mount, we
1942 			// need to disconnect it to be able to unmount
1943 			struct vnode* vnode = fd_vnode(descriptor);
1944 			if (vnodeToDisconnect != NULL) {
1945 				if (vnode == vnodeToDisconnect)
1946 					disconnect_fd(descriptor);
1947 			} else if ((vnode != NULL && vnode->mount == mount)
1948 				|| (vnode == NULL && descriptor->u.mount == mount))
1949 				disconnect_fd(descriptor);
1950 
1951 			put_fd(descriptor);
1952 		}
1953 	}
1954 }
1955 
1956 
1957 /*!	\brief Gets the root node of the current IO context.
1958 	If \a kernel is \c true, the kernel IO context will be used.
1959 	The caller obtains a reference to the returned node.
1960 */
1961 struct vnode*
1962 get_root_vnode(bool kernel)
1963 {
1964 	if (!kernel) {
1965 		// Get current working directory from io context
1966 		struct io_context* context = get_current_io_context(kernel);
1967 
1968 		mutex_lock(&sIOContextRootLock);
1969 
1970 		struct vnode* root = context->root;
1971 		if (root != NULL)
1972 			inc_vnode_ref_count(root);
1973 
1974 		mutex_unlock(&sIOContextRootLock);
1975 
1976 		if (root != NULL)
1977 			return root;
1978 
1979 		// That should never happen.
1980 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1981 			"have a root\n", team_get_current_team_id());
1982 	}
1983 
1984 	inc_vnode_ref_count(sRoot);
1985 	return sRoot;
1986 }
1987 
1988 
1989 /*!	\brief Gets the directory path and leaf name for a given path.
1990 
1991 	The supplied \a path is transformed to refer to the directory part of
1992 	the entry identified by the original path, and into the buffer \a filename
1993 	the leaf name of the original entry is written.
1994 	Neither the returned path nor the leaf name can be expected to be
1995 	canonical.
1996 
1997 	\param path The path to be analyzed. Must be able to store at least one
1998 		   additional character.
1999 	\param filename The buffer into which the leaf name will be written.
2000 		   Must be of size B_FILE_NAME_LENGTH at least.
2001 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2002 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2003 		   if the given path name is empty.
2004 */
2005 static status_t
2006 get_dir_path_and_leaf(char* path, char* filename)
2007 {
2008 	if (*path == '\0')
2009 		return B_ENTRY_NOT_FOUND;
2010 
2011 	char* last = strrchr(path, '/');
2012 		// '/' are not allowed in file names!
2013 
2014 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2015 
2016 	if (last == NULL) {
2017 		// this path is single segment with no '/' in it
2018 		// ex. "foo"
2019 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2020 			return B_NAME_TOO_LONG;
2021 
2022 		strcpy(path, ".");
2023 	} else {
2024 		last++;
2025 		if (last[0] == '\0') {
2026 			// special case: the path ends in one or more '/' - remove them
2027 			while (*--last == '/' && last != path);
2028 			last[1] = '\0';
2029 
2030 			if (last == path && last[0] == '/') {
2031 				// This path points to the root of the file system
2032 				strcpy(filename, ".");
2033 				return B_OK;
2034 			}
2035 			for (; last != path && *(last - 1) != '/'; last--);
2036 				// rewind to the start of the leaf before the '/'
2037 		}
2038 
2039 		// normal leaf: replace the leaf portion of the path with a '.'
2040 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2041 			return B_NAME_TOO_LONG;
2042 
2043 		last[0] = '.';
2044 		last[1] = '\0';
2045 	}
2046 	return B_OK;
2047 }
2048 
2049 
2050 static status_t
2051 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2052 	bool traverse, bool kernel, VnodePutter& _vnode)
2053 {
2054 	char clonedName[B_FILE_NAME_LENGTH + 1];
2055 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2056 		return B_NAME_TOO_LONG;
2057 
2058 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2059 	struct vnode* directory;
2060 
2061 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2062 	if (status < 0)
2063 		return status;
2064 
2065 	return vnode_path_to_vnode(directory, clonedName, traverse, kernel,
2066 		_vnode, NULL);
2067 }
2068 
2069 
2070 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2071 	and returns the respective vnode.
2072 	On success a reference to the vnode is acquired for the caller.
2073 */
2074 static status_t
2075 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2076 {
2077 	ino_t id;
2078 	bool missing;
2079 
2080 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2081 		return missing ? B_ENTRY_NOT_FOUND
2082 			: get_vnode(dir->device, id, _vnode, true, false);
2083 	}
2084 
2085 	status_t status = FS_CALL(dir, lookup, name, &id);
2086 	if (status != B_OK)
2087 		return status;
2088 
2089 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2090 	// have a reference and just need to look the node up.
2091 	rw_lock_read_lock(&sVnodeLock);
2092 	*_vnode = lookup_vnode(dir->device, id);
2093 	rw_lock_read_unlock(&sVnodeLock);
2094 
2095 	if (*_vnode == NULL) {
2096 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2097 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2098 		return B_ENTRY_NOT_FOUND;
2099 	}
2100 
2101 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2102 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2103 //		(*_vnode)->mount->id, (*_vnode)->id);
2104 
2105 	return B_OK;
2106 }
2107 
2108 
2109 /*!	Returns the vnode for the relative \a path starting at the specified \a vnode.
2110 
2111 	\param[in,out] path The relative path being searched. Must not be NULL.
2112 	If the function returns successfully, \a path contains the name of the last path
2113 	component. This function clobbers the buffer pointed to by \a path only
2114 	if it does contain more than one component.
2115 
2116 	If the function fails and leafName is not NULL, \a _vnode contains the last directory,
2117 	the caller has the responsibility to call put_vnode() on it.
2118 
2119 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2120 	it is successful or not!
2121 
2122 	\param[out] _vnode If the function returns B_OK, points to the found node.
2123 	\param[out] _vnode If the function returns something else and leafname is not NULL: set to the
2124 		last existing directory in the path. The caller has responsibility to release it using
2125 		put_vnode().
2126 	\param[out] _vnode If the function returns something else and leafname is NULL: not used.
2127 */
2128 static status_t
2129 vnode_path_to_vnode(struct vnode* start, char* path, bool traverseLeafLink,
2130 	int count, struct io_context* ioContext, VnodePutter& _vnode,
2131 	ino_t* _parentID, char* leafName)
2132 {
2133 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2134 	ASSERT(!_vnode.IsSet());
2135 
2136 	VnodePutter vnode(start);
2137 
2138 	if (path == NULL)
2139 		return B_BAD_VALUE;
2140 	if (*path == '\0')
2141 		return B_ENTRY_NOT_FOUND;
2142 
2143 	status_t status = B_OK;
2144 	ino_t lastParentID = vnode->id;
2145 	while (true) {
2146 		char* nextPath;
2147 
2148 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2149 			path));
2150 
2151 		// done?
2152 		if (path[0] == '\0')
2153 			break;
2154 
2155 		// walk to find the next path component ("path" will point to a single
2156 		// path component), and filter out multiple slashes
2157 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2158 				nextPath++);
2159 
2160 		bool directoryFound = false;
2161 		if (*nextPath == '/') {
2162 			directoryFound = true;
2163 			*nextPath = '\0';
2164 			do
2165 				nextPath++;
2166 			while (*nextPath == '/');
2167 		}
2168 
2169 		// See if the '..' is at a covering vnode move to the covered
2170 		// vnode so we pass the '..' path to the underlying filesystem.
2171 		// Also prevent breaking the root of the IO context.
2172 		if (strcmp("..", path) == 0) {
2173 			if (vnode.Get() == ioContext->root) {
2174 				// Attempted prison break! Keep it contained.
2175 				path = nextPath;
2176 				continue;
2177 			}
2178 
2179 			if (Vnode* coveredVnode = get_covered_vnode(vnode.Get()))
2180 				vnode.SetTo(coveredVnode);
2181 		}
2182 
2183 		// check if vnode is really a directory
2184 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2185 			status = B_NOT_A_DIRECTORY;
2186 
2187 		// Check if we have the right to search the current directory vnode.
2188 		// If a file system doesn't have the access() function, we assume that
2189 		// searching a directory is always allowed
2190 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2191 			status = FS_CALL(vnode.Get(), access, X_OK);
2192 
2193 		// Tell the filesystem to get the vnode of this path component (if we
2194 		// got the permission from the call above)
2195 		VnodePutter nextVnode;
2196 		if (status == B_OK) {
2197 			struct vnode* temp = NULL;
2198 			status = lookup_dir_entry(vnode.Get(), path, &temp);
2199 			nextVnode.SetTo(temp);
2200 		}
2201 
2202 		if (status != B_OK) {
2203 			if (leafName != NULL) {
2204 				strlcpy(leafName, path, B_FILE_NAME_LENGTH);
2205 				_vnode.SetTo(vnode.Detach());
2206 			}
2207 			return status;
2208 		}
2209 
2210 		// If the new node is a symbolic link, resolve it (if we've been told
2211 		// to do it)
2212 		if (S_ISLNK(nextVnode->Type())
2213 			&& (traverseLeafLink || directoryFound)) {
2214 			size_t bufferSize;
2215 			char* buffer;
2216 
2217 			TRACE(("traverse link\n"));
2218 
2219 			if (count + 1 > B_MAX_SYMLINKS)
2220 				return B_LINK_LIMIT;
2221 
2222 			bufferSize = B_PATH_NAME_LENGTH;
2223 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2224 			if (buffer == NULL)
2225 				return B_NO_MEMORY;
2226 
2227 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2228 				bufferSize--;
2229 				status = FS_CALL(nextVnode.Get(), read_symlink, buffer, &bufferSize);
2230 				// null-terminate
2231 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2232 					buffer[bufferSize] = '\0';
2233 			} else
2234 				status = B_BAD_VALUE;
2235 
2236 			if (status != B_OK) {
2237 				free(buffer);
2238 				return status;
2239 			}
2240 			nextVnode.Unset();
2241 
2242 			// Check if we start from the root directory or the current
2243 			// directory ("vnode" still points to that one).
2244 			// Cut off all leading slashes if it's the root directory
2245 			path = buffer;
2246 			bool absoluteSymlink = false;
2247 			if (path[0] == '/') {
2248 				// we don't need the old directory anymore
2249 				vnode.Unset();
2250 
2251 				while (*++path == '/')
2252 					;
2253 
2254 				mutex_lock(&sIOContextRootLock);
2255 				vnode.SetTo(ioContext->root);
2256 				inc_vnode_ref_count(vnode.Get());
2257 				mutex_unlock(&sIOContextRootLock);
2258 
2259 				absoluteSymlink = true;
2260 			}
2261 
2262 			inc_vnode_ref_count(vnode.Get());
2263 				// balance the next recursion - we will decrement the
2264 				// ref_count of the vnode, no matter if we succeeded or not
2265 
2266 			if (absoluteSymlink && *path == '\0') {
2267 				// symlink was just "/"
2268 				nextVnode.SetTo(vnode.Get());
2269 			} else {
2270 				status = vnode_path_to_vnode(vnode.Get(), path, true, count + 1,
2271 					ioContext, nextVnode, &lastParentID, leafName);
2272 			}
2273 
2274 			object_cache_free(sPathNameCache, buffer, 0);
2275 
2276 			if (status != B_OK) {
2277 				if (leafName != NULL)
2278 					_vnode.SetTo(nextVnode.Detach());
2279 				return status;
2280 			}
2281 		} else
2282 			lastParentID = vnode->id;
2283 
2284 		// decrease the ref count on the old dir we just looked up into
2285 		vnode.Unset();
2286 
2287 		path = nextPath;
2288 		vnode.SetTo(nextVnode.Detach());
2289 
2290 		// see if we hit a covered node
2291 		if (Vnode* coveringNode = get_covering_vnode(vnode.Get()))
2292 			vnode.SetTo(coveringNode);
2293 	}
2294 
2295 	_vnode.SetTo(vnode.Detach());
2296 	if (_parentID)
2297 		*_parentID = lastParentID;
2298 
2299 	return B_OK;
2300 }
2301 
2302 
2303 static status_t
2304 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2305 	bool kernel, VnodePutter& _vnode, ino_t* _parentID, char* leafName)
2306 {
2307 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2308 		get_current_io_context(kernel), _vnode, _parentID, leafName);
2309 }
2310 
2311 
2312 static status_t
2313 path_to_vnode(char* path, bool traverseLink, VnodePutter& _vnode,
2314 	ino_t* _parentID, bool kernel)
2315 {
2316 	struct vnode* start = NULL;
2317 
2318 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2319 
2320 	if (!path)
2321 		return B_BAD_VALUE;
2322 
2323 	if (*path == '\0')
2324 		return B_ENTRY_NOT_FOUND;
2325 
2326 	// figure out if we need to start at root or at cwd
2327 	if (*path == '/') {
2328 		if (sRoot == NULL) {
2329 			// we're a bit early, aren't we?
2330 			return B_ERROR;
2331 		}
2332 
2333 		while (*++path == '/')
2334 			;
2335 		start = get_root_vnode(kernel);
2336 
2337 		if (*path == '\0') {
2338 			_vnode.SetTo(start);
2339 			return B_OK;
2340 		}
2341 
2342 	} else {
2343 		struct io_context* context = get_current_io_context(kernel);
2344 
2345 		mutex_lock(&context->io_mutex);
2346 		start = context->cwd;
2347 		if (start != NULL)
2348 			inc_vnode_ref_count(start);
2349 		mutex_unlock(&context->io_mutex);
2350 
2351 		if (start == NULL)
2352 			return B_ERROR;
2353 	}
2354 
2355 	return vnode_path_to_vnode(start, path, traverseLink, kernel, _vnode,
2356 		_parentID);
2357 }
2358 
2359 
2360 /*! Returns the vnode in the next to last segment of the path, and returns
2361 	the last portion in filename.
2362 	The path buffer must be able to store at least one additional character.
2363 */
2364 static status_t
2365 path_to_dir_vnode(char* path, VnodePutter& _vnode, char* filename,
2366 	bool kernel)
2367 {
2368 	status_t status = get_dir_path_and_leaf(path, filename);
2369 	if (status != B_OK)
2370 		return status;
2371 
2372 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2373 }
2374 
2375 
2376 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2377 		   to by a FD + path pair.
2378 
2379 	\a path must be given in either case. \a fd might be omitted, in which
2380 	case \a path is either an absolute path or one relative to the current
2381 	directory. If both a supplied and \a path is relative it is reckoned off
2382 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2383 	ignored.
2384 
2385 	The caller has the responsibility to call put_vnode() on the returned
2386 	directory vnode.
2387 
2388 	\param fd The FD. May be < 0.
2389 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2390 	       is modified by this function. It must have at least room for a
2391 	       string one character longer than the path it contains.
2392 	\param _vnode A pointer to a variable the directory vnode shall be written
2393 		   into.
2394 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2395 		   the leaf name of the specified entry will be written.
2396 	\param kernel \c true, if invoked from inside the kernel, \c false if
2397 		   invoked from userland.
2398 	\return \c B_OK, if everything went fine, another error code otherwise.
2399 */
2400 static status_t
2401 fd_and_path_to_dir_vnode(int fd, char* path, VnodePutter& _vnode,
2402 	char* filename, bool kernel)
2403 {
2404 	if (!path)
2405 		return B_BAD_VALUE;
2406 	if (*path == '\0')
2407 		return B_ENTRY_NOT_FOUND;
2408 	if (fd < 0)
2409 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2410 
2411 	status_t status = get_dir_path_and_leaf(path, filename);
2412 	if (status != B_OK)
2413 		return status;
2414 
2415 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2416 }
2417 
2418 
2419 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2420 		   to by a vnode + path pair.
2421 
2422 	\a path must be given in either case. \a vnode might be omitted, in which
2423 	case \a path is either an absolute path or one relative to the current
2424 	directory. If both a supplied and \a path is relative it is reckoned off
2425 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2426 	ignored.
2427 
2428 	The caller has the responsibility to call put_vnode() on the returned
2429 	directory vnode.
2430 
2431 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2432 	it is successful or not.
2433 
2434 	\param vnode The vnode. May be \c NULL.
2435 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2436 	       is modified by this function. It must have at least room for a
2437 	       string one character longer than the path it contains.
2438 	\param _vnode A pointer to a variable the directory vnode shall be written
2439 		   into.
2440 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2441 		   the leaf name of the specified entry will be written.
2442 	\param kernel \c true, if invoked from inside the kernel, \c false if
2443 		   invoked from userland.
2444 	\return \c B_OK, if everything went fine, another error code otherwise.
2445 */
2446 static status_t
2447 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2448 	VnodePutter& _vnode, char* filename, bool kernel)
2449 {
2450 	VnodePutter vnodePutter(vnode);
2451 
2452 	if (!path)
2453 		return B_BAD_VALUE;
2454 	if (*path == '\0')
2455 		return B_ENTRY_NOT_FOUND;
2456 	if (vnode == NULL || path[0] == '/')
2457 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2458 
2459 	status_t status = get_dir_path_and_leaf(path, filename);
2460 	if (status != B_OK)
2461 		return status;
2462 
2463 	vnodePutter.Detach();
2464 	return vnode_path_to_vnode(vnode, path, true, kernel, _vnode, NULL);
2465 }
2466 
2467 
2468 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2469 */
2470 static status_t
2471 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2472 	size_t bufferSize, struct io_context* ioContext)
2473 {
2474 	if (bufferSize < sizeof(struct dirent))
2475 		return B_BAD_VALUE;
2476 
2477 	// See if the vnode is covering another vnode and move to the covered
2478 	// vnode so we get the underlying file system
2479 	VnodePutter vnodePutter;
2480 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2481 		vnode = coveredVnode;
2482 		vnodePutter.SetTo(vnode);
2483 	}
2484 
2485 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2486 		// The FS supports getting the name of a vnode.
2487 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2488 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2489 			return B_OK;
2490 	}
2491 
2492 	// The FS doesn't support getting the name of a vnode. So we search the
2493 	// parent directory for the vnode, if the caller let us.
2494 
2495 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2496 		return B_UNSUPPORTED;
2497 
2498 	void* cookie;
2499 
2500 	status_t status = FS_CALL(parent, open_dir, &cookie);
2501 	if (status >= B_OK) {
2502 		while (true) {
2503 			uint32 num = 1;
2504 			// We use the FS hook directly instead of dir_read(), since we don't
2505 			// want the entries to be fixed. We have already resolved vnode to
2506 			// the covered node.
2507 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2508 				&num);
2509 			if (status != B_OK)
2510 				break;
2511 			if (num == 0) {
2512 				status = B_ENTRY_NOT_FOUND;
2513 				break;
2514 			}
2515 
2516 			if (vnode->id == buffer->d_ino) {
2517 				// found correct entry!
2518 				break;
2519 			}
2520 		}
2521 
2522 		FS_CALL(parent, close_dir, cookie);
2523 		FS_CALL(parent, free_dir_cookie, cookie);
2524 	}
2525 	return status;
2526 }
2527 
2528 
2529 static status_t
2530 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2531 	size_t nameSize, bool kernel)
2532 {
2533 	char buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2534 	struct dirent* dirent = (struct dirent*)buffer;
2535 
2536 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2537 		get_current_io_context(kernel));
2538 	if (status != B_OK)
2539 		return status;
2540 
2541 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2542 		return B_BUFFER_OVERFLOW;
2543 
2544 	return B_OK;
2545 }
2546 
2547 
2548 /*!	Gets the full path to a given directory vnode.
2549 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2550 	file system doesn't support this call, it will fall back to iterating
2551 	through the parent directory to get the name of the child.
2552 
2553 	To protect against circular loops, it supports a maximum tree depth
2554 	of 256 levels.
2555 
2556 	Note that the path may not be correct the time this function returns!
2557 	It doesn't use any locking to prevent returning the correct path, as
2558 	paths aren't safe anyway: the path to a file can change at any time.
2559 
2560 	It might be a good idea, though, to check if the returned path exists
2561 	in the calling function (it's not done here because of efficiency)
2562 */
2563 static status_t
2564 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2565 	bool kernel)
2566 {
2567 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2568 
2569 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2570 		return B_BAD_VALUE;
2571 
2572 	if (!S_ISDIR(vnode->Type()))
2573 		return B_NOT_A_DIRECTORY;
2574 
2575 	char* path = buffer;
2576 	int32 insert = bufferSize;
2577 	int32 maxLevel = 256;
2578 	int32 length;
2579 	status_t status = B_OK;
2580 	struct io_context* ioContext = get_current_io_context(kernel);
2581 
2582 	// we don't use get_vnode() here because this call is more
2583 	// efficient and does all we need from get_vnode()
2584 	inc_vnode_ref_count(vnode);
2585 
2586 	path[--insert] = '\0';
2587 		// the path is filled right to left
2588 
2589 	while (true) {
2590 		// If the node is the context's root, bail out. Otherwise resolve mount
2591 		// points.
2592 		if (vnode == ioContext->root)
2593 			break;
2594 
2595 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2596 			put_vnode(vnode);
2597 			vnode = coveredVnode;
2598 		}
2599 
2600 		// lookup the parent vnode
2601 		struct vnode* parentVnode;
2602 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2603 		if (status != B_OK)
2604 			goto out;
2605 
2606 		if (parentVnode == vnode) {
2607 			// The caller apparently got their hands on a node outside of their
2608 			// context's root. Now we've hit the global root.
2609 			put_vnode(parentVnode);
2610 			break;
2611 		}
2612 
2613 		// get the node's name
2614 		char nameBuffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2615 			// also used for fs_read_dir()
2616 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2617 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2618 			sizeof(nameBuffer), ioContext);
2619 
2620 		// release the current vnode, we only need its parent from now on
2621 		put_vnode(vnode);
2622 		vnode = parentVnode;
2623 
2624 		if (status != B_OK)
2625 			goto out;
2626 
2627 		// TODO: add an explicit check for loops in about 10 levels to do
2628 		// real loop detection
2629 
2630 		// don't go deeper as 'maxLevel' to prevent circular loops
2631 		if (maxLevel-- < 0) {
2632 			status = B_LINK_LIMIT;
2633 			goto out;
2634 		}
2635 
2636 		// add the name in front of the current path
2637 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2638 		length = strlen(name);
2639 		insert -= length;
2640 		if (insert <= 0) {
2641 			status = B_RESULT_NOT_REPRESENTABLE;
2642 			goto out;
2643 		}
2644 		memcpy(path + insert, name, length);
2645 		path[--insert] = '/';
2646 	}
2647 
2648 	// the root dir will result in an empty path: fix it
2649 	if (path[insert] == '\0')
2650 		path[--insert] = '/';
2651 
2652 	TRACE(("  path is: %s\n", path + insert));
2653 
2654 	// move the path to the start of the buffer
2655 	length = bufferSize - insert;
2656 	memmove(buffer, path + insert, length);
2657 
2658 out:
2659 	put_vnode(vnode);
2660 	return status;
2661 }
2662 
2663 
2664 /*!	Checks the length of every path component, and adds a '.'
2665 	if the path ends in a slash.
2666 	The given path buffer must be able to store at least one
2667 	additional character.
2668 */
2669 static status_t
2670 check_path(char* to)
2671 {
2672 	int32 length = 0;
2673 
2674 	// check length of every path component
2675 
2676 	while (*to) {
2677 		char* begin;
2678 		if (*to == '/')
2679 			to++, length++;
2680 
2681 		begin = to;
2682 		while (*to != '/' && *to)
2683 			to++, length++;
2684 
2685 		if (to - begin > B_FILE_NAME_LENGTH)
2686 			return B_NAME_TOO_LONG;
2687 	}
2688 
2689 	if (length == 0)
2690 		return B_ENTRY_NOT_FOUND;
2691 
2692 	// complete path if there is a slash at the end
2693 
2694 	if (*(to - 1) == '/') {
2695 		if (length > B_PATH_NAME_LENGTH - 2)
2696 			return B_NAME_TOO_LONG;
2697 
2698 		to[0] = '.';
2699 		to[1] = '\0';
2700 	}
2701 
2702 	return B_OK;
2703 }
2704 
2705 
2706 static struct file_descriptor*
2707 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2708 {
2709 	struct file_descriptor* descriptor
2710 		= get_fd(get_current_io_context(kernel), fd);
2711 	if (descriptor == NULL)
2712 		return NULL;
2713 
2714 	struct vnode* vnode = fd_vnode(descriptor);
2715 	if (vnode == NULL) {
2716 		put_fd(descriptor);
2717 		return NULL;
2718 	}
2719 
2720 	// ToDo: when we can close a file descriptor at any point, investigate
2721 	//	if this is still valid to do (accessing the vnode without ref_count
2722 	//	or locking)
2723 	*_vnode = vnode;
2724 	return descriptor;
2725 }
2726 
2727 
2728 static struct vnode*
2729 get_vnode_from_fd(int fd, bool kernel)
2730 {
2731 	struct file_descriptor* descriptor;
2732 	struct vnode* vnode;
2733 
2734 	descriptor = get_fd(get_current_io_context(kernel), fd);
2735 	if (descriptor == NULL)
2736 		return NULL;
2737 
2738 	vnode = fd_vnode(descriptor);
2739 	if (vnode != NULL)
2740 		inc_vnode_ref_count(vnode);
2741 
2742 	put_fd(descriptor);
2743 	return vnode;
2744 }
2745 
2746 
2747 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2748 	only the path will be considered. In this case, the \a path must not be
2749 	NULL.
2750 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2751 	and should be NULL for files.
2752 */
2753 static status_t
2754 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2755 	VnodePutter& _vnode, ino_t* _parentID, bool kernel)
2756 {
2757 	if (fd < 0 && !path)
2758 		return B_BAD_VALUE;
2759 
2760 	if (path != NULL && *path == '\0')
2761 		return B_ENTRY_NOT_FOUND;
2762 
2763 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2764 		// no FD or absolute path
2765 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2766 	}
2767 
2768 	// FD only, or FD + relative path
2769 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2770 	if (vnode == NULL)
2771 		return B_FILE_ERROR;
2772 
2773 	if (path != NULL) {
2774 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, kernel,
2775 			_vnode, _parentID);
2776 	}
2777 
2778 	// there is no relative path to take into account
2779 
2780 	_vnode.SetTo(vnode);
2781 	if (_parentID)
2782 		*_parentID = -1;
2783 
2784 	return B_OK;
2785 }
2786 
2787 
2788 static int
2789 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2790 	void* cookie, int openMode, bool kernel)
2791 {
2792 	struct file_descriptor* descriptor;
2793 	int fd;
2794 
2795 	// If the vnode is locked, we don't allow creating a new file/directory
2796 	// file_descriptor for it
2797 	if (vnode && vnode->mandatory_locked_by != NULL
2798 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2799 		return B_BUSY;
2800 
2801 	if ((openMode & O_RDWR) != 0 && (openMode & O_WRONLY) != 0)
2802 		return B_BAD_VALUE;
2803 
2804 	descriptor = alloc_fd();
2805 	if (!descriptor)
2806 		return B_NO_MEMORY;
2807 
2808 	if (vnode)
2809 		descriptor->u.vnode = vnode;
2810 	else
2811 		descriptor->u.mount = mount;
2812 	descriptor->cookie = cookie;
2813 
2814 	switch (type) {
2815 		// vnode types
2816 		case FDTYPE_FILE:
2817 			descriptor->ops = &sFileOps;
2818 			break;
2819 		case FDTYPE_DIR:
2820 			descriptor->ops = &sDirectoryOps;
2821 			break;
2822 		case FDTYPE_ATTR:
2823 			descriptor->ops = &sAttributeOps;
2824 			break;
2825 		case FDTYPE_ATTR_DIR:
2826 			descriptor->ops = &sAttributeDirectoryOps;
2827 			break;
2828 
2829 		// mount types
2830 		case FDTYPE_INDEX_DIR:
2831 			descriptor->ops = &sIndexDirectoryOps;
2832 			break;
2833 		case FDTYPE_QUERY:
2834 			descriptor->ops = &sQueryOps;
2835 			break;
2836 
2837 		default:
2838 			panic("get_new_fd() called with unknown type %d\n", type);
2839 			break;
2840 	}
2841 	descriptor->type = type;
2842 	descriptor->open_mode = openMode;
2843 
2844 	if (descriptor->ops->fd_seek != NULL) {
2845 		// some kinds of files are not seekable
2846 		switch (vnode->Type() & S_IFMT) {
2847 			case S_IFIFO:
2848 			case S_IFSOCK:
2849 				ASSERT(descriptor->pos == -1);
2850 				break;
2851 
2852 			// The Open Group Base Specs don't mention any file types besides pipes,
2853 			// FIFOs, and sockets specially, so we allow seeking all others.
2854 			default:
2855 				descriptor->pos = 0;
2856 				break;
2857 		}
2858 	}
2859 
2860 	io_context* context = get_current_io_context(kernel);
2861 	fd = new_fd(context, descriptor);
2862 	if (fd < 0) {
2863 		descriptor->ops = NULL;
2864 		put_fd(descriptor);
2865 		return B_NO_MORE_FDS;
2866 	}
2867 
2868 	mutex_lock(&context->io_mutex);
2869 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2870 	mutex_unlock(&context->io_mutex);
2871 
2872 	return fd;
2873 }
2874 
2875 
2876 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2877 	vfs_normalize_path(). See there for more documentation.
2878 */
2879 static status_t
2880 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2881 {
2882 	VnodePutter dir;
2883 	status_t error;
2884 
2885 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2886 		// get dir vnode + leaf name
2887 		char leaf[B_FILE_NAME_LENGTH];
2888 		error = vnode_and_path_to_dir_vnode(dir.Detach(), path, dir, leaf, kernel);
2889 		if (error != B_OK)
2890 			return error;
2891 		strcpy(path, leaf);
2892 
2893 		// get file vnode, if we shall resolve links
2894 		bool fileExists = false;
2895 		VnodePutter fileVnode;
2896 		if (traverseLink) {
2897 			inc_vnode_ref_count(dir.Get());
2898 			if (vnode_path_to_vnode(dir.Get(), path, false, kernel, fileVnode,
2899 					NULL) == B_OK) {
2900 				fileExists = true;
2901 			}
2902 		}
2903 
2904 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2905 			// we're done -- construct the path
2906 			bool hasLeaf = true;
2907 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2908 				// special cases "." and ".." -- get the dir, forget the leaf
2909 				error = vnode_path_to_vnode(dir.Detach(), leaf, false, kernel,
2910 					dir, NULL);
2911 				if (error != B_OK)
2912 					return error;
2913 				hasLeaf = false;
2914 			}
2915 
2916 			// get the directory path
2917 			error = dir_vnode_to_path(dir.Get(), path, B_PATH_NAME_LENGTH, kernel);
2918 			if (error != B_OK)
2919 				return error;
2920 
2921 			// append the leaf name
2922 			if (hasLeaf) {
2923 				// insert a directory separator if this is not the file system
2924 				// root
2925 				if ((strcmp(path, "/") != 0
2926 					&& strlcat(path, "/", pathSize) >= pathSize)
2927 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2928 					return B_NAME_TOO_LONG;
2929 				}
2930 			}
2931 
2932 			return B_OK;
2933 		}
2934 
2935 		// read link
2936 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2937 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2938 			error = FS_CALL(fileVnode.Get(), read_symlink, path, &bufferSize);
2939 			if (error != B_OK)
2940 				return error;
2941 			if (bufferSize < B_PATH_NAME_LENGTH)
2942 				path[bufferSize] = '\0';
2943 		} else
2944 			return B_BAD_VALUE;
2945 	}
2946 
2947 	return B_LINK_LIMIT;
2948 }
2949 
2950 
2951 static status_t
2952 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2953 	struct io_context* ioContext)
2954 {
2955 	// Make sure the IO context root is not bypassed.
2956 	if (parent == ioContext->root) {
2957 		*_device = parent->device;
2958 		*_node = parent->id;
2959 		return B_OK;
2960 	}
2961 
2962 	inc_vnode_ref_count(parent);
2963 		// vnode_path_to_vnode() puts the node
2964 
2965 	// ".." is guaranteed not to be clobbered by this call
2966 	VnodePutter vnode;
2967 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false,
2968 		ioContext, vnode, NULL);
2969 	if (status == B_OK) {
2970 		*_device = vnode->device;
2971 		*_node = vnode->id;
2972 	}
2973 
2974 	return status;
2975 }
2976 
2977 
2978 #ifdef ADD_DEBUGGER_COMMANDS
2979 
2980 
2981 static void
2982 _dump_advisory_locking(advisory_locking* locking)
2983 {
2984 	if (locking == NULL)
2985 		return;
2986 
2987 	kprintf("   lock:        %" B_PRId32, locking->lock);
2988 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2989 
2990 	int32 index = 0;
2991 	LockList::Iterator iterator = locking->locks.GetIterator();
2992 	while (iterator.HasNext()) {
2993 		struct advisory_lock* lock = iterator.Next();
2994 
2995 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2996 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
2997 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
2998 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2999 	}
3000 }
3001 
3002 
3003 static void
3004 _dump_mount(struct fs_mount* mount)
3005 {
3006 	kprintf("MOUNT: %p\n", mount);
3007 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3008 	kprintf(" device_name:   %s\n", mount->device_name);
3009 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3010 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3011 	kprintf(" partition:     %p\n", mount->partition);
3012 	kprintf(" lock:          %p\n", &mount->lock);
3013 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3014 		mount->owns_file_device ? " owns_file_device" : "");
3015 
3016 	fs_volume* volume = mount->volume;
3017 	while (volume != NULL) {
3018 		kprintf(" volume %p:\n", volume);
3019 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3020 		kprintf("  private_volume:   %p\n", volume->private_volume);
3021 		kprintf("  ops:              %p\n", volume->ops);
3022 		kprintf("  file_system:      %p\n", volume->file_system);
3023 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3024 		volume = volume->super_volume;
3025 	}
3026 
3027 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3028 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3029 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3030 	set_debug_variable("_partition", (addr_t)mount->partition);
3031 }
3032 
3033 
3034 static bool
3035 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3036 	const char* name)
3037 {
3038 	bool insertSlash = buffer[bufferSize] != '\0';
3039 	size_t nameLength = strlen(name);
3040 
3041 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3042 		return false;
3043 
3044 	if (insertSlash)
3045 		buffer[--bufferSize] = '/';
3046 
3047 	bufferSize -= nameLength;
3048 	memcpy(buffer + bufferSize, name, nameLength);
3049 
3050 	return true;
3051 }
3052 
3053 
3054 static bool
3055 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3056 	ino_t nodeID)
3057 {
3058 	if (bufferSize == 0)
3059 		return false;
3060 
3061 	bool insertSlash = buffer[bufferSize] != '\0';
3062 	if (insertSlash)
3063 		buffer[--bufferSize] = '/';
3064 
3065 	size_t size = snprintf(buffer, bufferSize,
3066 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3067 	if (size > bufferSize) {
3068 		if (insertSlash)
3069 			bufferSize++;
3070 		return false;
3071 	}
3072 
3073 	if (size < bufferSize)
3074 		memmove(buffer + bufferSize - size, buffer, size);
3075 
3076 	bufferSize -= size;
3077 	return true;
3078 }
3079 
3080 
3081 static char*
3082 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3083 	bool& _truncated)
3084 {
3085 	// null-terminate the path
3086 	buffer[--bufferSize] = '\0';
3087 
3088 	while (true) {
3089 		while (vnode->covers != NULL)
3090 			vnode = vnode->covers;
3091 
3092 		if (vnode == sRoot) {
3093 			_truncated = bufferSize == 0;
3094 			if (!_truncated)
3095 				buffer[--bufferSize] = '/';
3096 			return buffer + bufferSize;
3097 		}
3098 
3099 		// resolve the name
3100 		ino_t dirID;
3101 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3102 			vnode->id, dirID);
3103 		if (name == NULL) {
3104 			// Failed to resolve the name -- prepend "<dev,node>/".
3105 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3106 				vnode->mount->id, vnode->id);
3107 			return buffer + bufferSize;
3108 		}
3109 
3110 		// prepend the name
3111 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3112 			_truncated = true;
3113 			return buffer + bufferSize;
3114 		}
3115 
3116 		// resolve the directory node
3117 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3118 		if (nextVnode == NULL) {
3119 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3120 				vnode->mount->id, dirID);
3121 			return buffer + bufferSize;
3122 		}
3123 
3124 		vnode = nextVnode;
3125 	}
3126 }
3127 
3128 
3129 static void
3130 _dump_vnode(struct vnode* vnode, bool printPath)
3131 {
3132 	kprintf("VNODE: %p\n", vnode);
3133 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3134 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3135 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3136 	kprintf(" private_node:  %p\n", vnode->private_node);
3137 	kprintf(" mount:         %p\n", vnode->mount);
3138 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3139 	kprintf(" covers:        %p\n", vnode->covers);
3140 	kprintf(" cache:         %p\n", vnode->cache);
3141 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3142 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3143 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3144 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3145 
3146 	_dump_advisory_locking(vnode->advisory_locking);
3147 
3148 	if (printPath) {
3149 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3150 		if (buffer != NULL) {
3151 			bool truncated;
3152 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3153 				B_PATH_NAME_LENGTH, truncated);
3154 			if (path != NULL) {
3155 				kprintf(" path:          ");
3156 				if (truncated)
3157 					kputs("<truncated>/");
3158 				kputs(path);
3159 				kputs("\n");
3160 			} else
3161 				kprintf("Failed to resolve vnode path.\n");
3162 
3163 			debug_free(buffer);
3164 		} else
3165 			kprintf("Failed to allocate memory for constructing the path.\n");
3166 	}
3167 
3168 	set_debug_variable("_node", (addr_t)vnode->private_node);
3169 	set_debug_variable("_mount", (addr_t)vnode->mount);
3170 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3171 	set_debug_variable("_covers", (addr_t)vnode->covers);
3172 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3173 }
3174 
3175 
3176 static int
3177 dump_mount(int argc, char** argv)
3178 {
3179 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3180 		kprintf("usage: %s [id|address]\n", argv[0]);
3181 		return 0;
3182 	}
3183 
3184 	ulong val = parse_expression(argv[1]);
3185 	uint32 id = val;
3186 
3187 	struct fs_mount* mount = sMountsTable->Lookup(id);
3188 	if (mount == NULL) {
3189 		if (IS_USER_ADDRESS(id)) {
3190 			kprintf("fs_mount not found\n");
3191 			return 0;
3192 		}
3193 		mount = (fs_mount*)val;
3194 	}
3195 
3196 	_dump_mount(mount);
3197 	return 0;
3198 }
3199 
3200 
3201 static int
3202 dump_mounts(int argc, char** argv)
3203 {
3204 	if (argc != 1) {
3205 		kprintf("usage: %s\n", argv[0]);
3206 		return 0;
3207 	}
3208 
3209 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3210 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3211 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3212 
3213 	struct fs_mount* mount;
3214 
3215 	MountTable::Iterator iterator(sMountsTable);
3216 	while (iterator.HasNext()) {
3217 		mount = iterator.Next();
3218 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3219 			mount->root_vnode->covers, mount->volume->private_volume,
3220 			mount->volume->file_system_name);
3221 
3222 		fs_volume* volume = mount->volume;
3223 		while (volume->super_volume != NULL) {
3224 			volume = volume->super_volume;
3225 			kprintf("                                     %p %s\n",
3226 				volume->private_volume, volume->file_system_name);
3227 		}
3228 	}
3229 
3230 	return 0;
3231 }
3232 
3233 
3234 static int
3235 dump_vnode(int argc, char** argv)
3236 {
3237 	bool printPath = false;
3238 	int argi = 1;
3239 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3240 		printPath = true;
3241 		argi++;
3242 	}
3243 
3244 	if (argi >= argc || argi + 2 < argc) {
3245 		print_debugger_command_usage(argv[0]);
3246 		return 0;
3247 	}
3248 
3249 	struct vnode* vnode = NULL;
3250 
3251 	if (argi + 1 == argc) {
3252 		vnode = (struct vnode*)parse_expression(argv[argi]);
3253 		if (IS_USER_ADDRESS(vnode)) {
3254 			kprintf("invalid vnode address\n");
3255 			return 0;
3256 		}
3257 		_dump_vnode(vnode, printPath);
3258 		return 0;
3259 	}
3260 
3261 	dev_t device = parse_expression(argv[argi]);
3262 	ino_t id = parse_expression(argv[argi + 1]);
3263 
3264 	VnodeTable::Iterator iterator(sVnodeTable);
3265 	while (iterator.HasNext()) {
3266 		vnode = iterator.Next();
3267 		if (vnode->id != id || vnode->device != device)
3268 			continue;
3269 
3270 		_dump_vnode(vnode, printPath);
3271 	}
3272 
3273 	return 0;
3274 }
3275 
3276 
3277 static int
3278 dump_vnodes(int argc, char** argv)
3279 {
3280 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3281 		kprintf("usage: %s [device]\n", argv[0]);
3282 		return 0;
3283 	}
3284 
3285 	// restrict dumped nodes to a certain device if requested
3286 	dev_t device = parse_expression(argv[1]);
3287 
3288 	struct vnode* vnode;
3289 
3290 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3291 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3292 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3293 
3294 	VnodeTable::Iterator iterator(sVnodeTable);
3295 	while (iterator.HasNext()) {
3296 		vnode = iterator.Next();
3297 		if (vnode->device != device)
3298 			continue;
3299 
3300 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3301 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3302 			vnode->private_node, vnode->advisory_locking,
3303 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3304 			vnode->IsUnpublished() ? "u" : "-");
3305 	}
3306 
3307 	return 0;
3308 }
3309 
3310 
3311 static int
3312 dump_vnode_caches(int argc, char** argv)
3313 {
3314 	struct vnode* vnode;
3315 
3316 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3317 		kprintf("usage: %s [device]\n", argv[0]);
3318 		return 0;
3319 	}
3320 
3321 	// restrict dumped nodes to a certain device if requested
3322 	dev_t device = -1;
3323 	if (argc > 1)
3324 		device = parse_expression(argv[1]);
3325 
3326 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3327 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3328 
3329 	VnodeTable::Iterator iterator(sVnodeTable);
3330 	while (iterator.HasNext()) {
3331 		vnode = iterator.Next();
3332 		if (vnode->cache == NULL)
3333 			continue;
3334 		if (device != -1 && vnode->device != device)
3335 			continue;
3336 
3337 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3338 			vnode, vnode->device, vnode->id, vnode->cache,
3339 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3340 			vnode->cache->page_count);
3341 	}
3342 
3343 	return 0;
3344 }
3345 
3346 
3347 int
3348 dump_io_context(int argc, char** argv)
3349 {
3350 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3351 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3352 		return 0;
3353 	}
3354 
3355 	struct io_context* context = NULL;
3356 
3357 	if (argc > 1) {
3358 		ulong num = parse_expression(argv[1]);
3359 		if (IS_KERNEL_ADDRESS(num))
3360 			context = (struct io_context*)num;
3361 		else {
3362 			Team* team = team_get_team_struct_locked(num);
3363 			if (team == NULL) {
3364 				kprintf("could not find team with ID %lu\n", num);
3365 				return 0;
3366 			}
3367 			context = (struct io_context*)team->io_context;
3368 		}
3369 	} else
3370 		context = get_current_io_context(true);
3371 
3372 	kprintf("I/O CONTEXT: %p\n", context);
3373 	kprintf(" root vnode:\t%p\n", context->root);
3374 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3375 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3376 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3377 
3378 	if (context->num_used_fds) {
3379 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3380 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3381 	}
3382 
3383 	for (uint32 i = 0; i < context->table_size; i++) {
3384 		struct file_descriptor* fd = context->fds[i];
3385 		if (fd == NULL)
3386 			continue;
3387 
3388 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3389 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3390 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3391 			fd->pos, fd->cookie,
3392 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3393 				? "mount" : "vnode",
3394 			fd->u.vnode);
3395 	}
3396 
3397 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3398 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3399 
3400 	set_debug_variable("_cwd", (addr_t)context->cwd);
3401 
3402 	return 0;
3403 }
3404 
3405 
3406 int
3407 dump_vnode_usage(int argc, char** argv)
3408 {
3409 	if (argc != 1) {
3410 		kprintf("usage: %s\n", argv[0]);
3411 		return 0;
3412 	}
3413 
3414 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3415 		sUnusedVnodes, kMaxUnusedVnodes);
3416 
3417 	uint32 count = sVnodeTable->CountElements();
3418 
3419 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3420 		count - sUnusedVnodes);
3421 	return 0;
3422 }
3423 
3424 #endif	// ADD_DEBUGGER_COMMANDS
3425 
3426 
3427 /*!	Clears memory specified by an iovec array.
3428 */
3429 static void
3430 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3431 {
3432 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3433 		size_t length = std::min(vecs[i].iov_len, bytes);
3434 		memset(vecs[i].iov_base, 0, length);
3435 		bytes -= length;
3436 	}
3437 }
3438 
3439 
3440 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3441 	and calls the file system hooks to read/write the request to disk.
3442 */
3443 static status_t
3444 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3445 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3446 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3447 	bool doWrite)
3448 {
3449 	if (fileVecCount == 0) {
3450 		// There are no file vecs at this offset, so we're obviously trying
3451 		// to access the file outside of its bounds
3452 		return B_BAD_VALUE;
3453 	}
3454 
3455 	size_t numBytes = *_numBytes;
3456 	uint32 fileVecIndex;
3457 	size_t vecOffset = *_vecOffset;
3458 	uint32 vecIndex = *_vecIndex;
3459 	status_t status;
3460 	size_t size;
3461 
3462 	if (!doWrite && vecOffset == 0) {
3463 		// now directly read the data from the device
3464 		// the first file_io_vec can be read directly
3465 		// TODO: we could also write directly
3466 
3467 		if (fileVecs[0].length < (off_t)numBytes)
3468 			size = fileVecs[0].length;
3469 		else
3470 			size = numBytes;
3471 
3472 		if (fileVecs[0].offset >= 0) {
3473 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3474 				&vecs[vecIndex], vecCount - vecIndex, &size);
3475 		} else {
3476 			// sparse read
3477 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3478 			status = B_OK;
3479 		}
3480 		if (status != B_OK)
3481 			return status;
3482 
3483 		ASSERT((off_t)size <= fileVecs[0].length);
3484 
3485 		// If the file portion was contiguous, we're already done now
3486 		if (size == numBytes)
3487 			return B_OK;
3488 
3489 		// if we reached the end of the file, we can return as well
3490 		if ((off_t)size != fileVecs[0].length) {
3491 			*_numBytes = size;
3492 			return B_OK;
3493 		}
3494 
3495 		fileVecIndex = 1;
3496 
3497 		// first, find out where we have to continue in our iovecs
3498 		for (; vecIndex < vecCount; vecIndex++) {
3499 			if (size < vecs[vecIndex].iov_len)
3500 				break;
3501 
3502 			size -= vecs[vecIndex].iov_len;
3503 		}
3504 
3505 		vecOffset = size;
3506 	} else {
3507 		fileVecIndex = 0;
3508 		size = 0;
3509 	}
3510 
3511 	// Too bad, let's process the rest of the file_io_vecs
3512 
3513 	size_t totalSize = size;
3514 	size_t bytesLeft = numBytes - size;
3515 
3516 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3517 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3518 		off_t fileOffset = fileVec.offset;
3519 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3520 
3521 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3522 			fileLeft));
3523 
3524 		// process the complete fileVec
3525 		while (fileLeft > 0) {
3526 			iovec tempVecs[MAX_TEMP_IO_VECS];
3527 			uint32 tempCount = 0;
3528 
3529 			// size tracks how much of what is left of the current fileVec
3530 			// (fileLeft) has been assigned to tempVecs
3531 			size = 0;
3532 
3533 			// assign what is left of the current fileVec to the tempVecs
3534 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3535 					&& tempCount < MAX_TEMP_IO_VECS;) {
3536 				// try to satisfy one iovec per iteration (or as much as
3537 				// possible)
3538 
3539 				// bytes left of the current iovec
3540 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3541 				if (vecLeft == 0) {
3542 					vecOffset = 0;
3543 					vecIndex++;
3544 					continue;
3545 				}
3546 
3547 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3548 					vecIndex, vecOffset, size));
3549 
3550 				// actually available bytes
3551 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3552 
3553 				tempVecs[tempCount].iov_base
3554 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3555 				tempVecs[tempCount].iov_len = tempVecSize;
3556 				tempCount++;
3557 
3558 				size += tempVecSize;
3559 				vecOffset += tempVecSize;
3560 			}
3561 
3562 			size_t bytes = size;
3563 
3564 			if (fileOffset == -1) {
3565 				if (doWrite) {
3566 					panic("sparse write attempt: vnode %p", vnode);
3567 					status = B_IO_ERROR;
3568 				} else {
3569 					// sparse read
3570 					zero_iovecs(tempVecs, tempCount, bytes);
3571 					status = B_OK;
3572 				}
3573 			} else if (doWrite) {
3574 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3575 					tempVecs, tempCount, &bytes);
3576 			} else {
3577 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3578 					tempVecs, tempCount, &bytes);
3579 			}
3580 			if (status != B_OK)
3581 				return status;
3582 
3583 			totalSize += bytes;
3584 			bytesLeft -= size;
3585 			if (fileOffset >= 0)
3586 				fileOffset += size;
3587 			fileLeft -= size;
3588 			//dprintf("-> file left = %Lu\n", fileLeft);
3589 
3590 			if (size != bytes || vecIndex >= vecCount) {
3591 				// there are no more bytes or iovecs, let's bail out
3592 				*_numBytes = totalSize;
3593 				return B_OK;
3594 			}
3595 		}
3596 	}
3597 
3598 	*_vecIndex = vecIndex;
3599 	*_vecOffset = vecOffset;
3600 	*_numBytes = totalSize;
3601 	return B_OK;
3602 }
3603 
3604 
3605 static bool
3606 is_user_in_group(gid_t gid)
3607 {
3608 	if (gid == getegid())
3609 		return true;
3610 
3611 	gid_t groups[NGROUPS_MAX];
3612 	int groupCount = getgroups(NGROUPS_MAX, groups);
3613 	for (int i = 0; i < groupCount; i++) {
3614 		if (gid == groups[i])
3615 			return true;
3616 	}
3617 
3618 	return false;
3619 }
3620 
3621 
3622 static status_t
3623 free_io_context(io_context* context)
3624 {
3625 	uint32 i;
3626 
3627 	TIOC(FreeIOContext(context));
3628 
3629 	if (context->root)
3630 		put_vnode(context->root);
3631 
3632 	if (context->cwd)
3633 		put_vnode(context->cwd);
3634 
3635 	mutex_lock(&context->io_mutex);
3636 
3637 	for (i = 0; i < context->table_size; i++) {
3638 		if (struct file_descriptor* descriptor = context->fds[i]) {
3639 			close_fd(context, descriptor);
3640 			put_fd(descriptor);
3641 		}
3642 	}
3643 
3644 	mutex_destroy(&context->io_mutex);
3645 
3646 	remove_node_monitors(context);
3647 	free(context->fds);
3648 	free(context);
3649 
3650 	return B_OK;
3651 }
3652 
3653 
3654 static status_t
3655 resize_monitor_table(struct io_context* context, const int newSize)
3656 {
3657 	int	status = B_OK;
3658 
3659 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3660 		return B_BAD_VALUE;
3661 
3662 	mutex_lock(&context->io_mutex);
3663 
3664 	if ((size_t)newSize < context->num_monitors) {
3665 		status = B_BUSY;
3666 		goto out;
3667 	}
3668 	context->max_monitors = newSize;
3669 
3670 out:
3671 	mutex_unlock(&context->io_mutex);
3672 	return status;
3673 }
3674 
3675 
3676 //	#pragma mark - public API for file systems
3677 
3678 
3679 extern "C" status_t
3680 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3681 	fs_vnode_ops* ops)
3682 {
3683 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3684 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3685 
3686 	if (privateNode == NULL)
3687 		return B_BAD_VALUE;
3688 
3689 	int32 tries = BUSY_VNODE_RETRIES;
3690 restart:
3691 	// create the node
3692 	bool nodeCreated;
3693 	struct vnode* vnode;
3694 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3695 		nodeCreated);
3696 	if (status != B_OK)
3697 		return status;
3698 
3699 	WriteLocker nodeLocker(sVnodeLock, true);
3700 		// create_new_vnode_and_lock() has locked for us
3701 
3702 	if (!nodeCreated && vnode->IsBusy()) {
3703 		nodeLocker.Unlock();
3704 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3705 			return B_BUSY;
3706 		goto restart;
3707 	}
3708 
3709 	// file system integrity check:
3710 	// test if the vnode already exists and bail out if this is the case!
3711 	if (!nodeCreated) {
3712 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3713 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3714 			vnode->private_node);
3715 		return B_ERROR;
3716 	}
3717 
3718 	vnode->private_node = privateNode;
3719 	vnode->ops = ops;
3720 	vnode->SetUnpublished(true);
3721 
3722 	TRACE(("returns: %s\n", strerror(status)));
3723 
3724 	return status;
3725 }
3726 
3727 
3728 extern "C" status_t
3729 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3730 	fs_vnode_ops* ops, int type, uint32 flags)
3731 {
3732 	FUNCTION(("publish_vnode()\n"));
3733 
3734 	int32 tries = BUSY_VNODE_RETRIES;
3735 restart:
3736 	WriteLocker locker(sVnodeLock);
3737 
3738 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3739 
3740 	bool nodeCreated = false;
3741 	if (vnode == NULL) {
3742 		if (privateNode == NULL)
3743 			return B_BAD_VALUE;
3744 
3745 		// create the node
3746 		locker.Unlock();
3747 			// create_new_vnode_and_lock() will re-lock for us on success
3748 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3749 			nodeCreated);
3750 		if (status != B_OK)
3751 			return status;
3752 
3753 		locker.SetTo(sVnodeLock, true);
3754 	}
3755 
3756 	if (nodeCreated) {
3757 		vnode->private_node = privateNode;
3758 		vnode->ops = ops;
3759 		vnode->SetUnpublished(true);
3760 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3761 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3762 		// already known, but not published
3763 	} else if (vnode->IsBusy()) {
3764 		locker.Unlock();
3765 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3766 			return B_BUSY;
3767 		goto restart;
3768 	} else
3769 		return B_BAD_VALUE;
3770 
3771 	bool publishSpecialSubNode = false;
3772 
3773 	vnode->SetType(type);
3774 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3775 	publishSpecialSubNode = is_special_node_type(type)
3776 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3777 
3778 	status_t status = B_OK;
3779 
3780 	// create sub vnodes, if necessary
3781 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3782 		locker.Unlock();
3783 
3784 		fs_volume* subVolume = volume;
3785 		if (volume->sub_volume != NULL) {
3786 			while (status == B_OK && subVolume->sub_volume != NULL) {
3787 				subVolume = subVolume->sub_volume;
3788 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3789 					vnode);
3790 			}
3791 		}
3792 
3793 		if (status == B_OK && publishSpecialSubNode)
3794 			status = create_special_sub_node(vnode, flags);
3795 
3796 		if (status != B_OK) {
3797 			// error -- clean up the created sub vnodes
3798 			while (subVolume->super_volume != volume) {
3799 				subVolume = subVolume->super_volume;
3800 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3801 			}
3802 		}
3803 
3804 		if (status == B_OK) {
3805 			ReadLocker vnodesReadLocker(sVnodeLock);
3806 			AutoLocker<Vnode> nodeLocker(vnode);
3807 			vnode->SetBusy(false);
3808 			vnode->SetUnpublished(false);
3809 		} else {
3810 			locker.Lock();
3811 			sVnodeTable->Remove(vnode);
3812 			remove_vnode_from_mount_list(vnode, vnode->mount);
3813 			object_cache_free(sVnodeCache, vnode, 0);
3814 		}
3815 	} else {
3816 		// we still hold the write lock -- mark the node unbusy and published
3817 		vnode->SetBusy(false);
3818 		vnode->SetUnpublished(false);
3819 	}
3820 
3821 	TRACE(("returns: %s\n", strerror(status)));
3822 
3823 	return status;
3824 }
3825 
3826 
3827 extern "C" status_t
3828 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3829 {
3830 	struct vnode* vnode;
3831 
3832 	if (volume == NULL)
3833 		return B_BAD_VALUE;
3834 
3835 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3836 	if (status != B_OK)
3837 		return status;
3838 
3839 	// If this is a layered FS, we need to get the node cookie for the requested
3840 	// layer.
3841 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3842 		fs_vnode resolvedNode;
3843 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3844 			&resolvedNode);
3845 		if (status != B_OK) {
3846 			panic("get_vnode(): Failed to get super node for vnode %p, "
3847 				"volume: %p", vnode, volume);
3848 			put_vnode(vnode);
3849 			return status;
3850 		}
3851 
3852 		if (_privateNode != NULL)
3853 			*_privateNode = resolvedNode.private_node;
3854 	} else if (_privateNode != NULL)
3855 		*_privateNode = vnode->private_node;
3856 
3857 	return B_OK;
3858 }
3859 
3860 
3861 extern "C" status_t
3862 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3863 {
3864 	ReadLocker nodeLocker(sVnodeLock);
3865 
3866 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3867 	if (vnode == NULL)
3868 		return B_BAD_VALUE;
3869 
3870 	inc_vnode_ref_count(vnode);
3871 	return B_OK;
3872 }
3873 
3874 
3875 extern "C" status_t
3876 put_vnode(fs_volume* volume, ino_t vnodeID)
3877 {
3878 	struct vnode* vnode;
3879 
3880 	rw_lock_read_lock(&sVnodeLock);
3881 	vnode = lookup_vnode(volume->id, vnodeID);
3882 	rw_lock_read_unlock(&sVnodeLock);
3883 
3884 	if (vnode == NULL)
3885 		return B_BAD_VALUE;
3886 
3887 	dec_vnode_ref_count(vnode, false, true);
3888 	return B_OK;
3889 }
3890 
3891 
3892 extern "C" status_t
3893 remove_vnode(fs_volume* volume, ino_t vnodeID)
3894 {
3895 	ReadLocker locker(sVnodeLock);
3896 
3897 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3898 	if (vnode == NULL)
3899 		return B_ENTRY_NOT_FOUND;
3900 
3901 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3902 		// this vnode is in use
3903 		return B_BUSY;
3904 	}
3905 
3906 	vnode->Lock();
3907 
3908 	vnode->SetRemoved(true);
3909 	bool removeUnpublished = false;
3910 
3911 	if (vnode->IsUnpublished()) {
3912 		// prepare the vnode for deletion
3913 		removeUnpublished = true;
3914 		vnode->SetBusy(true);
3915 	}
3916 
3917 	vnode->Unlock();
3918 	locker.Unlock();
3919 
3920 	if (removeUnpublished) {
3921 		// If the vnode hasn't been published yet, we delete it here
3922 		atomic_add(&vnode->ref_count, -1);
3923 		free_vnode(vnode, true);
3924 	}
3925 
3926 	return B_OK;
3927 }
3928 
3929 
3930 extern "C" status_t
3931 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3932 {
3933 	struct vnode* vnode;
3934 
3935 	rw_lock_read_lock(&sVnodeLock);
3936 
3937 	vnode = lookup_vnode(volume->id, vnodeID);
3938 	if (vnode) {
3939 		AutoLocker<Vnode> nodeLocker(vnode);
3940 		vnode->SetRemoved(false);
3941 	}
3942 
3943 	rw_lock_read_unlock(&sVnodeLock);
3944 	return B_OK;
3945 }
3946 
3947 
3948 extern "C" status_t
3949 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3950 {
3951 	ReadLocker _(sVnodeLock);
3952 
3953 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3954 		if (_removed != NULL)
3955 			*_removed = vnode->IsRemoved();
3956 		return B_OK;
3957 	}
3958 
3959 	return B_BAD_VALUE;
3960 }
3961 
3962 
3963 extern "C" fs_volume*
3964 volume_for_vnode(fs_vnode* _vnode)
3965 {
3966 	if (_vnode == NULL)
3967 		return NULL;
3968 
3969 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3970 	return vnode->mount->volume;
3971 }
3972 
3973 
3974 extern "C" status_t
3975 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3976 	uid_t nodeUserID)
3977 {
3978 	// get node permissions
3979 	int userPermissions = (mode & S_IRWXU) >> 6;
3980 	int groupPermissions = (mode & S_IRWXG) >> 3;
3981 	int otherPermissions = mode & S_IRWXO;
3982 
3983 	// get the node permissions for this uid/gid
3984 	int permissions = 0;
3985 	uid_t uid = geteuid();
3986 
3987 	if (uid == 0) {
3988 		// user is root
3989 		// root has always read/write permission, but at least one of the
3990 		// X bits must be set for execute permission
3991 		permissions = userPermissions | groupPermissions | otherPermissions
3992 			| S_IROTH | S_IWOTH;
3993 		if (S_ISDIR(mode))
3994 			permissions |= S_IXOTH;
3995 	} else if (uid == nodeUserID) {
3996 		// user is node owner
3997 		permissions = userPermissions;
3998 	} else if (is_user_in_group(nodeGroupID)) {
3999 		// user is in owning group
4000 		permissions = groupPermissions;
4001 	} else {
4002 		// user is one of the others
4003 		permissions = otherPermissions;
4004 	}
4005 
4006 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4007 }
4008 
4009 
4010 #if 0
4011 extern "C" status_t
4012 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4013 	size_t* _numBytes)
4014 {
4015 	struct file_descriptor* descriptor;
4016 	struct vnode* vnode;
4017 
4018 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4019 	if (descriptor == NULL)
4020 		return B_FILE_ERROR;
4021 
4022 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4023 		count, 0, _numBytes);
4024 
4025 	put_fd(descriptor);
4026 	return status;
4027 }
4028 
4029 
4030 extern "C" status_t
4031 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4032 	size_t* _numBytes)
4033 {
4034 	struct file_descriptor* descriptor;
4035 	struct vnode* vnode;
4036 
4037 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4038 	if (descriptor == NULL)
4039 		return B_FILE_ERROR;
4040 
4041 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4042 		count, 0, _numBytes);
4043 
4044 	put_fd(descriptor);
4045 	return status;
4046 }
4047 #endif
4048 
4049 
4050 extern "C" status_t
4051 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4052 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4053 	size_t* _bytes)
4054 {
4055 	struct vnode* vnode;
4056 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4057 	if (!descriptor.IsSet())
4058 		return B_FILE_ERROR;
4059 
4060 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4061 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4062 		false);
4063 
4064 	return status;
4065 }
4066 
4067 
4068 extern "C" status_t
4069 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4070 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4071 	size_t* _bytes)
4072 {
4073 	struct vnode* vnode;
4074 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4075 	if (!descriptor.IsSet())
4076 		return B_FILE_ERROR;
4077 
4078 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4079 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4080 		true);
4081 
4082 	return status;
4083 }
4084 
4085 
4086 extern "C" status_t
4087 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4088 {
4089 	// lookup mount -- the caller is required to make sure that the mount
4090 	// won't go away
4091 	ReadLocker locker(sMountLock);
4092 	struct fs_mount* mount = find_mount(mountID);
4093 	if (mount == NULL)
4094 		return B_BAD_VALUE;
4095 	locker.Unlock();
4096 
4097 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4098 }
4099 
4100 
4101 extern "C" status_t
4102 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4103 {
4104 	// lookup mount -- the caller is required to make sure that the mount
4105 	// won't go away
4106 	ReadLocker locker(sMountLock);
4107 	struct fs_mount* mount = find_mount(mountID);
4108 	if (mount == NULL)
4109 		return B_BAD_VALUE;
4110 	locker.Unlock();
4111 
4112 	return mount->entry_cache.Add(dirID, name, -1, true);
4113 }
4114 
4115 
4116 extern "C" status_t
4117 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4118 {
4119 	// lookup mount -- the caller is required to make sure that the mount
4120 	// won't go away
4121 	ReadLocker locker(sMountLock);
4122 	struct fs_mount* mount = find_mount(mountID);
4123 	if (mount == NULL)
4124 		return B_BAD_VALUE;
4125 	locker.Unlock();
4126 
4127 	return mount->entry_cache.Remove(dirID, name);
4128 }
4129 
4130 
4131 //	#pragma mark - private VFS API
4132 //	Functions the VFS exports for other parts of the kernel
4133 
4134 
4135 /*! Acquires another reference to the vnode that has to be released
4136 	by calling vfs_put_vnode().
4137 */
4138 void
4139 vfs_acquire_vnode(struct vnode* vnode)
4140 {
4141 	inc_vnode_ref_count(vnode);
4142 }
4143 
4144 
4145 /*! This is currently called from file_cache_create() only.
4146 	It's probably a temporary solution as long as devfs requires that
4147 	fs_read_pages()/fs_write_pages() are called with the standard
4148 	open cookie and not with a device cookie.
4149 	If that's done differently, remove this call; it has no other
4150 	purpose.
4151 */
4152 extern "C" status_t
4153 vfs_get_cookie_from_fd(int fd, void** _cookie)
4154 {
4155 	struct file_descriptor* descriptor;
4156 
4157 	descriptor = get_fd(get_current_io_context(true), fd);
4158 	if (descriptor == NULL)
4159 		return B_FILE_ERROR;
4160 
4161 	*_cookie = descriptor->cookie;
4162 	return B_OK;
4163 }
4164 
4165 
4166 extern "C" status_t
4167 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4168 {
4169 	*vnode = get_vnode_from_fd(fd, kernel);
4170 
4171 	if (*vnode == NULL)
4172 		return B_FILE_ERROR;
4173 
4174 	return B_NO_ERROR;
4175 }
4176 
4177 
4178 extern "C" status_t
4179 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4180 {
4181 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4182 		path, kernel));
4183 
4184 	KPath pathBuffer;
4185 	if (pathBuffer.InitCheck() != B_OK)
4186 		return B_NO_MEMORY;
4187 
4188 	char* buffer = pathBuffer.LockBuffer();
4189 	strlcpy(buffer, path, pathBuffer.BufferSize());
4190 
4191 	VnodePutter vnode;
4192 	status_t status = path_to_vnode(buffer, true, vnode, NULL, kernel);
4193 	if (status != B_OK)
4194 		return status;
4195 
4196 	*_vnode = vnode.Detach();
4197 	return B_OK;
4198 }
4199 
4200 
4201 extern "C" status_t
4202 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4203 {
4204 	struct vnode* vnode = NULL;
4205 
4206 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4207 	if (status != B_OK)
4208 		return status;
4209 
4210 	*_vnode = vnode;
4211 	return B_OK;
4212 }
4213 
4214 
4215 extern "C" status_t
4216 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4217 	const char* name, struct vnode** _vnode)
4218 {
4219 	VnodePutter vnode;
4220 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, false, true, vnode);
4221 	*_vnode = vnode.Detach();
4222 	return status;
4223 }
4224 
4225 
4226 extern "C" void
4227 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4228 {
4229 	*_mountID = vnode->device;
4230 	*_vnodeID = vnode->id;
4231 }
4232 
4233 
4234 /*!
4235 	Helper function abstracting the process of "converting" a given
4236 	vnode-pointer to a fs_vnode-pointer.
4237 	Currently only used in bindfs.
4238 */
4239 extern "C" fs_vnode*
4240 vfs_fsnode_for_vnode(struct vnode* vnode)
4241 {
4242 	return vnode;
4243 }
4244 
4245 
4246 /*!
4247 	Calls fs_open() on the given vnode and returns a new
4248 	file descriptor for it
4249 */
4250 int
4251 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4252 {
4253 	return open_vnode(vnode, openMode, kernel);
4254 }
4255 
4256 
4257 /*!	Looks up a vnode with the given mount and vnode ID.
4258 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4259 	to the node.
4260 	It's currently only be used by file_cache_create().
4261 */
4262 extern "C" status_t
4263 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4264 {
4265 	rw_lock_read_lock(&sVnodeLock);
4266 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4267 	rw_lock_read_unlock(&sVnodeLock);
4268 
4269 	if (vnode == NULL)
4270 		return B_ERROR;
4271 
4272 	*_vnode = vnode;
4273 	return B_OK;
4274 }
4275 
4276 
4277 extern "C" status_t
4278 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4279 	bool traverseLeafLink, bool kernel, void** _node)
4280 {
4281 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4282 		volume, path, kernel));
4283 
4284 	KPath pathBuffer;
4285 	if (pathBuffer.InitCheck() != B_OK)
4286 		return B_NO_MEMORY;
4287 
4288 	fs_mount* mount;
4289 	status_t status = get_mount(volume->id, &mount);
4290 	if (status != B_OK)
4291 		return status;
4292 
4293 	char* buffer = pathBuffer.LockBuffer();
4294 	strlcpy(buffer, path, pathBuffer.BufferSize());
4295 
4296 	VnodePutter vnode;
4297 
4298 	if (buffer[0] == '/')
4299 		status = path_to_vnode(buffer, traverseLeafLink, vnode, NULL, kernel);
4300 	else {
4301 		inc_vnode_ref_count(mount->root_vnode);
4302 			// vnode_path_to_vnode() releases a reference to the starting vnode
4303 		status = vnode_path_to_vnode(mount->root_vnode, buffer, traverseLeafLink,
4304 			kernel, vnode, NULL);
4305 	}
4306 
4307 	put_mount(mount);
4308 
4309 	if (status != B_OK)
4310 		return status;
4311 
4312 	if (vnode->device != volume->id) {
4313 		// wrong mount ID - must not gain access on foreign file system nodes
4314 		return B_BAD_VALUE;
4315 	}
4316 
4317 	// Use get_vnode() to resolve the cookie for the right layer.
4318 	status = get_vnode(volume, vnode->id, _node);
4319 
4320 	return status;
4321 }
4322 
4323 
4324 status_t
4325 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4326 	struct stat* stat, bool kernel)
4327 {
4328 	status_t status;
4329 
4330 	if (path != NULL) {
4331 		// path given: get the stat of the node referred to by (fd, path)
4332 		KPath pathBuffer(path);
4333 		if (pathBuffer.InitCheck() != B_OK)
4334 			return B_NO_MEMORY;
4335 
4336 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4337 			traverseLeafLink, stat, kernel);
4338 	} else {
4339 		// no path given: get the FD and use the FD operation
4340 		FileDescriptorPutter descriptor
4341 			(get_fd(get_current_io_context(kernel), fd));
4342 		if (!descriptor.IsSet())
4343 			return B_FILE_ERROR;
4344 
4345 		if (descriptor->ops->fd_read_stat)
4346 			status = descriptor->ops->fd_read_stat(descriptor.Get(), stat);
4347 		else
4348 			status = B_UNSUPPORTED;
4349 	}
4350 
4351 	return status;
4352 }
4353 
4354 
4355 /*!	Finds the full path to the file that contains the module \a moduleName,
4356 	puts it into \a pathBuffer, and returns B_OK for success.
4357 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4358 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4359 	\a pathBuffer is clobbered in any case and must not be relied on if this
4360 	functions returns unsuccessfully.
4361 	\a basePath and \a pathBuffer must not point to the same space.
4362 */
4363 status_t
4364 vfs_get_module_path(const char* basePath, const char* moduleName,
4365 	char* pathBuffer, size_t bufferSize)
4366 {
4367 	status_t status;
4368 	size_t length;
4369 	char* path;
4370 
4371 	if (bufferSize == 0
4372 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4373 		return B_BUFFER_OVERFLOW;
4374 
4375 	VnodePutter dir;
4376 	status = path_to_vnode(pathBuffer, true, dir, NULL, true);
4377 	if (status != B_OK)
4378 		return status;
4379 
4380 	// the path buffer had been clobbered by the above call
4381 	length = strlcpy(pathBuffer, basePath, bufferSize);
4382 	if (pathBuffer[length - 1] != '/')
4383 		pathBuffer[length++] = '/';
4384 
4385 	path = pathBuffer + length;
4386 	bufferSize -= length;
4387 
4388 	VnodePutter file;
4389 	while (moduleName) {
4390 		char* nextPath = strchr(moduleName, '/');
4391 		if (nextPath == NULL)
4392 			length = strlen(moduleName);
4393 		else {
4394 			length = nextPath - moduleName;
4395 			nextPath++;
4396 		}
4397 
4398 		if (length + 1 >= bufferSize)
4399 			return B_BUFFER_OVERFLOW;
4400 
4401 		memcpy(path, moduleName, length);
4402 		path[length] = '\0';
4403 		moduleName = nextPath;
4404 
4405 		// vnode_path_to_vnode() assumes ownership of the passed dir
4406 		status = vnode_path_to_vnode(dir.Detach(), path, true, true, file, NULL);
4407 		if (status != B_OK)
4408 			return status;
4409 
4410 		if (S_ISDIR(file->Type())) {
4411 			// goto the next directory
4412 			path[length] = '/';
4413 			path[length + 1] = '\0';
4414 			path += length + 1;
4415 			bufferSize -= length + 1;
4416 
4417 			dir.SetTo(file.Detach());
4418 		} else if (S_ISREG(file->Type())) {
4419 			// it's a file so it should be what we've searched for
4420 			return B_OK;
4421 		} else {
4422 			TRACE(("vfs_get_module_path(): something is strange here: "
4423 				"0x%08" B_PRIx32 "...\n", file->Type()));
4424 			return B_ERROR;
4425 		}
4426 	}
4427 
4428 	// if we got here, the moduleName just pointed to a directory, not to
4429 	// a real module - what should we do in this case?
4430 	return B_ENTRY_NOT_FOUND;
4431 }
4432 
4433 
4434 /*!	\brief Normalizes a given path.
4435 
4436 	The path must refer to an existing or non-existing entry in an existing
4437 	directory, that is chopping off the leaf component the remaining path must
4438 	refer to an existing directory.
4439 
4440 	The returned will be canonical in that it will be absolute, will not
4441 	contain any "." or ".." components or duplicate occurrences of '/'s,
4442 	and none of the directory components will by symbolic links.
4443 
4444 	Any two paths referring to the same entry, will result in the same
4445 	normalized path (well, that is pretty much the definition of `normalized',
4446 	isn't it :-).
4447 
4448 	\param path The path to be normalized.
4449 	\param buffer The buffer into which the normalized path will be written.
4450 		   May be the same one as \a path.
4451 	\param bufferSize The size of \a buffer.
4452 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4453 	\param kernel \c true, if the IO context of the kernel shall be used,
4454 		   otherwise that of the team this thread belongs to. Only relevant,
4455 		   if the path is relative (to get the CWD).
4456 	\return \c B_OK if everything went fine, another error code otherwise.
4457 */
4458 status_t
4459 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4460 	bool traverseLink, bool kernel)
4461 {
4462 	if (!path || !buffer || bufferSize < 1)
4463 		return B_BAD_VALUE;
4464 
4465 	if (path != buffer) {
4466 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4467 			return B_BUFFER_OVERFLOW;
4468 	}
4469 
4470 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4471 }
4472 
4473 
4474 /*!	\brief Gets the parent of the passed in node.
4475 
4476 	Gets the parent of the passed in node, and correctly resolves covered
4477 	nodes.
4478 */
4479 extern "C" status_t
4480 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4481 {
4482 	return resolve_covered_parent(parent, device, node,
4483 		get_current_io_context(true));
4484 }
4485 
4486 
4487 /*!	\brief Creates a special node in the file system.
4488 
4489 	The caller gets a reference to the newly created node (which is passed
4490 	back through \a _createdVnode) and is responsible for releasing it.
4491 
4492 	\param path The path where to create the entry for the node. Can be \c NULL,
4493 		in which case the node is created without an entry in the root FS -- it
4494 		will automatically be deleted when the last reference has been released.
4495 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4496 		the target file system will just create the node with its standard
4497 		operations. Depending on the type of the node a subnode might be created
4498 		automatically, though.
4499 	\param mode The type and permissions for the node to be created.
4500 	\param flags Flags to be passed to the creating FS.
4501 	\param kernel \c true, if called in the kernel context (relevant only if
4502 		\a path is not \c NULL and not absolute).
4503 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4504 		file system creating the node, with the private data pointer and
4505 		operations for the super node. Can be \c NULL.
4506 	\param _createVnode Pointer to pre-allocated storage where to store the
4507 		pointer to the newly created node.
4508 	\return \c B_OK, if everything went fine, another error code otherwise.
4509 */
4510 status_t
4511 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4512 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4513 	struct vnode** _createdVnode)
4514 {
4515 	VnodePutter dirNode;
4516 	char _leaf[B_FILE_NAME_LENGTH];
4517 	char* leaf = NULL;
4518 
4519 	if (path) {
4520 		// We've got a path. Get the dir vnode and the leaf name.
4521 		KPath tmpPathBuffer;
4522 		if (tmpPathBuffer.InitCheck() != B_OK)
4523 			return B_NO_MEMORY;
4524 
4525 		char* tmpPath = tmpPathBuffer.LockBuffer();
4526 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4527 			return B_NAME_TOO_LONG;
4528 
4529 		// get the dir vnode and the leaf name
4530 		leaf = _leaf;
4531 		status_t error = path_to_dir_vnode(tmpPath, dirNode, leaf, kernel);
4532 		if (error != B_OK)
4533 			return error;
4534 	} else {
4535 		// No path. Create the node in the root FS.
4536 		dirNode.SetTo(sRoot);
4537 		inc_vnode_ref_count(dirNode.Get());
4538 	}
4539 
4540 	// check support for creating special nodes
4541 	if (!HAS_FS_CALL(dirNode, create_special_node))
4542 		return B_UNSUPPORTED;
4543 
4544 	// create the node
4545 	fs_vnode superVnode;
4546 	ino_t nodeID;
4547 	status_t status = FS_CALL(dirNode.Get(), create_special_node, leaf, subVnode,
4548 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4549 	if (status != B_OK)
4550 		return status;
4551 
4552 	// lookup the node
4553 	rw_lock_read_lock(&sVnodeLock);
4554 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4555 	rw_lock_read_unlock(&sVnodeLock);
4556 
4557 	if (*_createdVnode == NULL) {
4558 		panic("vfs_create_special_node(): lookup of node failed");
4559 		return B_ERROR;
4560 	}
4561 
4562 	return B_OK;
4563 }
4564 
4565 
4566 extern "C" void
4567 vfs_put_vnode(struct vnode* vnode)
4568 {
4569 	put_vnode(vnode);
4570 }
4571 
4572 
4573 extern "C" status_t
4574 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4575 {
4576 	// Get current working directory from io context
4577 	struct io_context* context = get_current_io_context(false);
4578 	status_t status = B_OK;
4579 
4580 	mutex_lock(&context->io_mutex);
4581 
4582 	if (context->cwd != NULL) {
4583 		*_mountID = context->cwd->device;
4584 		*_vnodeID = context->cwd->id;
4585 	} else
4586 		status = B_ERROR;
4587 
4588 	mutex_unlock(&context->io_mutex);
4589 	return status;
4590 }
4591 
4592 
4593 status_t
4594 vfs_unmount(dev_t mountID, uint32 flags)
4595 {
4596 	return fs_unmount(NULL, mountID, flags, true);
4597 }
4598 
4599 
4600 extern "C" status_t
4601 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4602 {
4603 	struct vnode* vnode;
4604 
4605 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4606 	if (status != B_OK)
4607 		return status;
4608 
4609 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4610 	put_vnode(vnode);
4611 	return B_OK;
4612 }
4613 
4614 
4615 extern "C" void
4616 vfs_free_unused_vnodes(int32 level)
4617 {
4618 	vnode_low_resource_handler(NULL,
4619 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4620 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4621 		level);
4622 }
4623 
4624 
4625 extern "C" bool
4626 vfs_can_page(struct vnode* vnode, void* cookie)
4627 {
4628 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4629 
4630 	if (HAS_FS_CALL(vnode, can_page))
4631 		return FS_CALL(vnode, can_page, cookie);
4632 	return false;
4633 }
4634 
4635 
4636 extern "C" status_t
4637 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4638 	const generic_io_vec* vecs, size_t count, uint32 flags,
4639 	generic_size_t* _numBytes)
4640 {
4641 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4642 		vecs, pos));
4643 
4644 #if VFS_PAGES_IO_TRACING
4645 	generic_size_t bytesRequested = *_numBytes;
4646 #endif
4647 
4648 	IORequest request;
4649 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4650 	if (status == B_OK) {
4651 		status = vfs_vnode_io(vnode, cookie, &request);
4652 		if (status == B_OK)
4653 			status = request.Wait();
4654 		*_numBytes = request.TransferredBytes();
4655 	}
4656 
4657 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4658 		status, *_numBytes));
4659 
4660 	return status;
4661 }
4662 
4663 
4664 extern "C" status_t
4665 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4666 	const generic_io_vec* vecs, size_t count, uint32 flags,
4667 	generic_size_t* _numBytes)
4668 {
4669 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4670 		vecs, pos));
4671 
4672 #if VFS_PAGES_IO_TRACING
4673 	generic_size_t bytesRequested = *_numBytes;
4674 #endif
4675 
4676 	IORequest request;
4677 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4678 	if (status == B_OK) {
4679 		status = vfs_vnode_io(vnode, cookie, &request);
4680 		if (status == B_OK)
4681 			status = request.Wait();
4682 		*_numBytes = request.TransferredBytes();
4683 	}
4684 
4685 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4686 		status, *_numBytes));
4687 
4688 	return status;
4689 }
4690 
4691 
4692 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4693 	created if \a allocate is \c true.
4694 	In case it's successful, it will also grab a reference to the cache
4695 	it returns.
4696 */
4697 extern "C" status_t
4698 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4699 {
4700 	if (vnode->cache != NULL) {
4701 		vnode->cache->AcquireRef();
4702 		*_cache = vnode->cache;
4703 		return B_OK;
4704 	}
4705 
4706 	rw_lock_read_lock(&sVnodeLock);
4707 	vnode->Lock();
4708 
4709 	status_t status = B_OK;
4710 
4711 	// The cache could have been created in the meantime
4712 	if (vnode->cache == NULL) {
4713 		if (allocate) {
4714 			// TODO: actually the vnode needs to be busy already here, or
4715 			//	else this won't work...
4716 			bool wasBusy = vnode->IsBusy();
4717 			vnode->SetBusy(true);
4718 
4719 			vnode->Unlock();
4720 			rw_lock_read_unlock(&sVnodeLock);
4721 
4722 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4723 
4724 			rw_lock_read_lock(&sVnodeLock);
4725 			vnode->Lock();
4726 			vnode->SetBusy(wasBusy);
4727 		} else
4728 			status = B_BAD_VALUE;
4729 	}
4730 
4731 	vnode->Unlock();
4732 	rw_lock_read_unlock(&sVnodeLock);
4733 
4734 	if (status == B_OK) {
4735 		vnode->cache->AcquireRef();
4736 		*_cache = vnode->cache;
4737 	}
4738 
4739 	return status;
4740 }
4741 
4742 
4743 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4744 	their own.
4745 	In case it's successful, it will also grab a reference to the cache
4746 	it returns.
4747 */
4748 extern "C" status_t
4749 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4750 {
4751 	rw_lock_read_lock(&sVnodeLock);
4752 	vnode->Lock();
4753 
4754 	status_t status = B_OK;
4755 	if (vnode->cache != NULL) {
4756 		status = B_NOT_ALLOWED;
4757 	} else {
4758 		vnode->cache = _cache;
4759 		_cache->AcquireRef();
4760 	}
4761 
4762 	vnode->Unlock();
4763 	rw_lock_read_unlock(&sVnodeLock);
4764 	return status;
4765 }
4766 
4767 
4768 status_t
4769 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4770 	file_io_vec* vecs, size_t* _count)
4771 {
4772 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4773 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4774 
4775 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4776 }
4777 
4778 
4779 status_t
4780 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4781 {
4782 	status_t status = FS_CALL(vnode, read_stat, stat);
4783 
4784 	// fill in the st_dev and st_ino fields
4785 	if (status == B_OK) {
4786 		stat->st_dev = vnode->device;
4787 		stat->st_ino = vnode->id;
4788 		// the rdev field must stay unset for non-special files
4789 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4790 			stat->st_rdev = -1;
4791 	}
4792 
4793 	return status;
4794 }
4795 
4796 
4797 status_t
4798 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4799 {
4800 	struct vnode* vnode;
4801 	status_t status = get_vnode(device, inode, &vnode, true, false);
4802 	if (status != B_OK)
4803 		return status;
4804 
4805 	status = vfs_stat_vnode(vnode, stat);
4806 
4807 	put_vnode(vnode);
4808 	return status;
4809 }
4810 
4811 
4812 status_t
4813 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4814 {
4815 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4816 }
4817 
4818 
4819 status_t
4820 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4821 	bool kernel, char* path, size_t pathLength)
4822 {
4823 	VnodePutter vnode;
4824 	status_t status;
4825 
4826 	// filter invalid leaf names
4827 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4828 		return B_BAD_VALUE;
4829 
4830 	// get the vnode matching the dir's node_ref
4831 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4832 		// special cases "." and "..": we can directly get the vnode of the
4833 		// referenced directory
4834 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, vnode);
4835 		leaf = NULL;
4836 	} else {
4837 		struct vnode* temp = NULL;
4838 		status = get_vnode(device, inode, &temp, true, false);
4839 		vnode.SetTo(temp);
4840 	}
4841 	if (status != B_OK)
4842 		return status;
4843 
4844 	// get the directory path
4845 	status = dir_vnode_to_path(vnode.Get(), path, pathLength, kernel);
4846 	vnode.Unset();
4847 		// we don't need the vnode anymore
4848 	if (status != B_OK)
4849 		return status;
4850 
4851 	// append the leaf name
4852 	if (leaf) {
4853 		// insert a directory separator if this is not the file system root
4854 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4855 				>= pathLength)
4856 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4857 			return B_NAME_TOO_LONG;
4858 		}
4859 	}
4860 
4861 	return B_OK;
4862 }
4863 
4864 
4865 /*!	If the given descriptor locked its vnode, that lock will be released. */
4866 void
4867 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4868 {
4869 	struct vnode* vnode = fd_vnode(descriptor);
4870 
4871 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4872 		vnode->mandatory_locked_by = NULL;
4873 }
4874 
4875 
4876 /*!	Releases any POSIX locks on the file descriptor. */
4877 status_t
4878 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4879 {
4880 	struct vnode* vnode = descriptor->u.vnode;
4881 	if (vnode == NULL)
4882 		return B_OK;
4883 
4884 	if (HAS_FS_CALL(vnode, release_lock))
4885 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4886 
4887 	return release_advisory_lock(vnode, context, NULL, NULL);
4888 }
4889 
4890 
4891 /*!	Closes all file descriptors of the specified I/O context that
4892 	have the O_CLOEXEC flag set.
4893 */
4894 void
4895 vfs_exec_io_context(io_context* context)
4896 {
4897 	uint32 i;
4898 
4899 	for (i = 0; i < context->table_size; i++) {
4900 		mutex_lock(&context->io_mutex);
4901 
4902 		struct file_descriptor* descriptor = context->fds[i];
4903 		bool remove = false;
4904 
4905 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4906 			context->fds[i] = NULL;
4907 			context->num_used_fds--;
4908 
4909 			remove = true;
4910 		}
4911 
4912 		mutex_unlock(&context->io_mutex);
4913 
4914 		if (remove) {
4915 			close_fd(context, descriptor);
4916 			put_fd(descriptor);
4917 		}
4918 	}
4919 }
4920 
4921 
4922 /*! Sets up a new io_control structure, and inherits the properties
4923 	of the parent io_control if it is given.
4924 */
4925 io_context*
4926 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4927 {
4928 	io_context* context = (io_context*)malloc(sizeof(io_context));
4929 	if (context == NULL)
4930 		return NULL;
4931 
4932 	TIOC(NewIOContext(context, parentContext));
4933 
4934 	memset(context, 0, sizeof(io_context));
4935 	context->ref_count = 1;
4936 
4937 	MutexLocker parentLocker;
4938 
4939 	size_t tableSize;
4940 	if (parentContext != NULL) {
4941 		parentLocker.SetTo(parentContext->io_mutex, false);
4942 		tableSize = parentContext->table_size;
4943 	} else
4944 		tableSize = DEFAULT_FD_TABLE_SIZE;
4945 
4946 	// allocate space for FDs and their close-on-exec flag
4947 	context->fds = (file_descriptor**)malloc(
4948 		sizeof(struct file_descriptor*) * tableSize
4949 		+ sizeof(struct select_info**) * tableSize
4950 		+ (tableSize + 7) / 8);
4951 	if (context->fds == NULL) {
4952 		free(context);
4953 		return NULL;
4954 	}
4955 
4956 	context->select_infos = (select_info**)(context->fds + tableSize);
4957 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4958 
4959 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4960 		+ sizeof(struct select_info**) * tableSize
4961 		+ (tableSize + 7) / 8);
4962 
4963 	mutex_init(&context->io_mutex, "I/O context");
4964 
4965 	// Copy all parent file descriptors
4966 
4967 	if (parentContext != NULL) {
4968 		size_t i;
4969 
4970 		mutex_lock(&sIOContextRootLock);
4971 		context->root = parentContext->root;
4972 		if (context->root)
4973 			inc_vnode_ref_count(context->root);
4974 		mutex_unlock(&sIOContextRootLock);
4975 
4976 		context->cwd = parentContext->cwd;
4977 		if (context->cwd)
4978 			inc_vnode_ref_count(context->cwd);
4979 
4980 		if (parentContext->inherit_fds) {
4981 			for (i = 0; i < tableSize; i++) {
4982 				struct file_descriptor* descriptor = parentContext->fds[i];
4983 
4984 				if (descriptor != NULL
4985 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
4986 					bool closeOnExec = fd_close_on_exec(parentContext, i);
4987 					if (closeOnExec && purgeCloseOnExec)
4988 						continue;
4989 
4990 					TFD(InheritFD(context, i, descriptor, parentContext));
4991 
4992 					context->fds[i] = descriptor;
4993 					context->num_used_fds++;
4994 					atomic_add(&descriptor->ref_count, 1);
4995 					atomic_add(&descriptor->open_count, 1);
4996 
4997 					if (closeOnExec)
4998 						fd_set_close_on_exec(context, i, true);
4999 				}
5000 			}
5001 		}
5002 
5003 		parentLocker.Unlock();
5004 	} else {
5005 		context->root = sRoot;
5006 		context->cwd = sRoot;
5007 
5008 		if (context->root)
5009 			inc_vnode_ref_count(context->root);
5010 
5011 		if (context->cwd)
5012 			inc_vnode_ref_count(context->cwd);
5013 	}
5014 
5015 	context->table_size = tableSize;
5016 	context->inherit_fds = parentContext != NULL;
5017 
5018 	list_init(&context->node_monitors);
5019 	context->max_monitors = DEFAULT_NODE_MONITORS;
5020 
5021 	return context;
5022 }
5023 
5024 
5025 void
5026 vfs_get_io_context(io_context* context)
5027 {
5028 	atomic_add(&context->ref_count, 1);
5029 }
5030 
5031 
5032 void
5033 vfs_put_io_context(io_context* context)
5034 {
5035 	if (atomic_add(&context->ref_count, -1) == 1)
5036 		free_io_context(context);
5037 }
5038 
5039 
5040 status_t
5041 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5042 {
5043 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5044 		return B_BAD_VALUE;
5045 
5046 	TIOC(ResizeIOContext(context, newSize));
5047 
5048 	MutexLocker _(context->io_mutex);
5049 
5050 	uint32 oldSize = context->table_size;
5051 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5052 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5053 
5054 	// If the tables shrink, make sure none of the fds being dropped are in use.
5055 	if (newSize < oldSize) {
5056 		for (uint32 i = oldSize; i-- > newSize;) {
5057 			if (context->fds[i])
5058 				return B_BUSY;
5059 		}
5060 	}
5061 
5062 	// store pointers to the old tables
5063 	file_descriptor** oldFDs = context->fds;
5064 	select_info** oldSelectInfos = context->select_infos;
5065 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5066 
5067 	// allocate new tables
5068 	file_descriptor** newFDs = (file_descriptor**)malloc(
5069 		sizeof(struct file_descriptor*) * newSize
5070 		+ sizeof(struct select_infos**) * newSize
5071 		+ newCloseOnExitBitmapSize);
5072 	if (newFDs == NULL)
5073 		return B_NO_MEMORY;
5074 
5075 	context->fds = newFDs;
5076 	context->select_infos = (select_info**)(context->fds + newSize);
5077 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5078 	context->table_size = newSize;
5079 
5080 	// copy entries from old tables
5081 	uint32 toCopy = min_c(oldSize, newSize);
5082 
5083 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5084 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5085 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5086 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5087 
5088 	// clear additional entries, if the tables grow
5089 	if (newSize > oldSize) {
5090 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5091 		memset(context->select_infos + oldSize, 0,
5092 			sizeof(void*) * (newSize - oldSize));
5093 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5094 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5095 	}
5096 
5097 	free(oldFDs);
5098 
5099 	return B_OK;
5100 }
5101 
5102 
5103 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5104 
5105 	Given an arbitrary vnode (identified by mount and node ID), the function
5106 	checks, whether the vnode is covered by another vnode. If it is, the
5107 	function returns the mount and node ID of the covering vnode. Otherwise
5108 	it simply returns the supplied mount and node ID.
5109 
5110 	In case of error (e.g. the supplied node could not be found) the variables
5111 	for storing the resolved mount and node ID remain untouched and an error
5112 	code is returned.
5113 
5114 	\param mountID The mount ID of the vnode in question.
5115 	\param nodeID The node ID of the vnode in question.
5116 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5117 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5118 	\return
5119 	- \c B_OK, if everything went fine,
5120 	- another error code, if something went wrong.
5121 */
5122 status_t
5123 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5124 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5125 {
5126 	// get the node
5127 	struct vnode* node;
5128 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5129 	if (error != B_OK)
5130 		return error;
5131 
5132 	// resolve the node
5133 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5134 		put_vnode(node);
5135 		node = coveringNode;
5136 	}
5137 
5138 	// set the return values
5139 	*resolvedMountID = node->device;
5140 	*resolvedNodeID = node->id;
5141 
5142 	put_vnode(node);
5143 
5144 	return B_OK;
5145 }
5146 
5147 
5148 status_t
5149 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5150 	ino_t* _mountPointNodeID)
5151 {
5152 	ReadLocker nodeLocker(sVnodeLock);
5153 	ReadLocker mountLocker(sMountLock);
5154 
5155 	struct fs_mount* mount = find_mount(mountID);
5156 	if (mount == NULL)
5157 		return B_BAD_VALUE;
5158 
5159 	Vnode* mountPoint = mount->covers_vnode;
5160 
5161 	*_mountPointMountID = mountPoint->device;
5162 	*_mountPointNodeID = mountPoint->id;
5163 
5164 	return B_OK;
5165 }
5166 
5167 
5168 status_t
5169 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5170 	ino_t coveredNodeID)
5171 {
5172 	// get the vnodes
5173 	Vnode* vnode;
5174 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5175 	if (error != B_OK)
5176 		return B_BAD_VALUE;
5177 	VnodePutter vnodePutter(vnode);
5178 
5179 	Vnode* coveredVnode;
5180 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5181 		false);
5182 	if (error != B_OK)
5183 		return B_BAD_VALUE;
5184 	VnodePutter coveredVnodePutter(coveredVnode);
5185 
5186 	// establish the covered/covering links
5187 	WriteLocker locker(sVnodeLock);
5188 
5189 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5190 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5191 		return B_BUSY;
5192 	}
5193 
5194 	vnode->covers = coveredVnode;
5195 	vnode->SetCovering(true);
5196 
5197 	coveredVnode->covered_by = vnode;
5198 	coveredVnode->SetCovered(true);
5199 
5200 	// the vnodes do now reference each other
5201 	inc_vnode_ref_count(vnode);
5202 	inc_vnode_ref_count(coveredVnode);
5203 
5204 	return B_OK;
5205 }
5206 
5207 
5208 int
5209 vfs_getrlimit(int resource, struct rlimit* rlp)
5210 {
5211 	if (!rlp)
5212 		return B_BAD_ADDRESS;
5213 
5214 	switch (resource) {
5215 		case RLIMIT_NOFILE:
5216 		{
5217 			struct io_context* context = get_current_io_context(false);
5218 			MutexLocker _(context->io_mutex);
5219 
5220 			rlp->rlim_cur = context->table_size;
5221 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5222 			return 0;
5223 		}
5224 
5225 		case RLIMIT_NOVMON:
5226 		{
5227 			struct io_context* context = get_current_io_context(false);
5228 			MutexLocker _(context->io_mutex);
5229 
5230 			rlp->rlim_cur = context->max_monitors;
5231 			rlp->rlim_max = MAX_NODE_MONITORS;
5232 			return 0;
5233 		}
5234 
5235 		default:
5236 			return B_BAD_VALUE;
5237 	}
5238 }
5239 
5240 
5241 int
5242 vfs_setrlimit(int resource, const struct rlimit* rlp)
5243 {
5244 	if (!rlp)
5245 		return B_BAD_ADDRESS;
5246 
5247 	switch (resource) {
5248 		case RLIMIT_NOFILE:
5249 			/* TODO: check getuid() */
5250 			if (rlp->rlim_max != RLIM_SAVED_MAX
5251 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5252 				return B_NOT_ALLOWED;
5253 
5254 			return vfs_resize_fd_table(get_current_io_context(false),
5255 				rlp->rlim_cur);
5256 
5257 		case RLIMIT_NOVMON:
5258 			/* TODO: check getuid() */
5259 			if (rlp->rlim_max != RLIM_SAVED_MAX
5260 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5261 				return B_NOT_ALLOWED;
5262 
5263 			return resize_monitor_table(get_current_io_context(false),
5264 				rlp->rlim_cur);
5265 
5266 		default:
5267 			return B_BAD_VALUE;
5268 	}
5269 }
5270 
5271 
5272 status_t
5273 vfs_init(kernel_args* args)
5274 {
5275 	vnode::StaticInit();
5276 
5277 	sVnodeTable = new(std::nothrow) VnodeTable();
5278 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5279 		panic("vfs_init: error creating vnode hash table\n");
5280 
5281 	struct vnode dummy_vnode;
5282 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5283 
5284 	struct fs_mount dummyMount;
5285 	sMountsTable = new(std::nothrow) MountTable();
5286 	if (sMountsTable == NULL
5287 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5288 		panic("vfs_init: error creating mounts hash table\n");
5289 
5290 	sPathNameCache = create_object_cache("vfs path names",
5291 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5292 	if (sPathNameCache == NULL)
5293 		panic("vfs_init: error creating path name object_cache\n");
5294 
5295 	sVnodeCache = create_object_cache("vfs vnodes",
5296 		sizeof(struct vnode), 8, NULL, NULL, NULL);
5297 	if (sVnodeCache == NULL)
5298 		panic("vfs_init: error creating vnode object_cache\n");
5299 
5300 	sFileDescriptorCache = create_object_cache("vfs fds",
5301 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5302 	if (sFileDescriptorCache == NULL)
5303 		panic("vfs_init: error creating file descriptor object_cache\n");
5304 
5305 	node_monitor_init();
5306 
5307 	sRoot = NULL;
5308 
5309 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5310 
5311 	if (block_cache_init() != B_OK)
5312 		return B_ERROR;
5313 
5314 #ifdef ADD_DEBUGGER_COMMANDS
5315 	// add some debugger commands
5316 	add_debugger_command_etc("vnode", &dump_vnode,
5317 		"Print info about the specified vnode",
5318 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5319 		"Prints information about the vnode specified by address <vnode> or\n"
5320 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5321 		"constructed and printed. It might not be possible to construct a\n"
5322 		"complete path, though.\n",
5323 		0);
5324 	add_debugger_command("vnodes", &dump_vnodes,
5325 		"list all vnodes (from the specified device)");
5326 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5327 		"list all vnode caches");
5328 	add_debugger_command("mount", &dump_mount,
5329 		"info about the specified fs_mount");
5330 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5331 	add_debugger_command("io_context", &dump_io_context,
5332 		"info about the I/O context");
5333 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5334 		"info about vnode usage");
5335 #endif
5336 
5337 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5338 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5339 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5340 		0);
5341 
5342 	fifo_init();
5343 	file_map_init();
5344 
5345 	return file_cache_init();
5346 }
5347 
5348 
5349 //	#pragma mark - fd_ops implementations
5350 
5351 
5352 /*!
5353 	Calls fs_open() on the given vnode and returns a new
5354 	file descriptor for it
5355 */
5356 static int
5357 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5358 {
5359 	void* cookie;
5360 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5361 	if (status != B_OK)
5362 		return status;
5363 
5364 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5365 	if (fd < 0) {
5366 		FS_CALL(vnode, close, cookie);
5367 		FS_CALL(vnode, free_cookie, cookie);
5368 	}
5369 	return fd;
5370 }
5371 
5372 
5373 /*!
5374 	Calls fs_open() on the given vnode and returns a new
5375 	file descriptor for it
5376 */
5377 static int
5378 create_vnode(struct vnode* directory, const char* name, int openMode,
5379 	int perms, bool kernel)
5380 {
5381 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5382 	status_t status = B_ERROR;
5383 	VnodePutter vnode, dirPutter;
5384 	void* cookie;
5385 	ino_t newID;
5386 	char clonedName[B_FILE_NAME_LENGTH + 1];
5387 
5388 	// This is somewhat tricky: If the entry already exists, the FS responsible
5389 	// for the directory might not necessarily also be the one responsible for
5390 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5391 	// we can actually never call the create() hook without O_EXCL. Instead we
5392 	// try to look the entry up first. If it already exists, we just open the
5393 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5394 	// introduces a race condition, since someone else might have created the
5395 	// entry in the meantime. We hope the respective FS returns the correct
5396 	// error code and retry (up to 3 times) again.
5397 
5398 	for (int i = 0; i < 3 && status != B_OK; i++) {
5399 		bool create = false;
5400 
5401 		// look the node up
5402 		{
5403 			struct vnode* entry = NULL;
5404 			status = lookup_dir_entry(directory, name, &entry);
5405 			vnode.SetTo(entry);
5406 		}
5407 		if (status == B_OK) {
5408 			if ((openMode & O_EXCL) != 0)
5409 				return B_FILE_EXISTS;
5410 
5411 			// If the node is a symlink, we have to follow it, unless
5412 			// O_NOTRAVERSE is set.
5413 			if (S_ISLNK(vnode->Type()) && traverse) {
5414 				vnode.Unset();
5415 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5416 						>= B_FILE_NAME_LENGTH) {
5417 					return B_NAME_TOO_LONG;
5418 				}
5419 
5420 				inc_vnode_ref_count(directory);
5421 				dirPutter.Unset();
5422 				status = vnode_path_to_vnode(directory, clonedName, true,
5423 					kernel, vnode, NULL, clonedName);
5424 				if (status != B_OK) {
5425 					// vnode is not found, but maybe it has a parent and we can create it from
5426 					// there. In that case, vnode_path_to_vnode has set vnode to the latest
5427 					// directory found in the path
5428 					if (status == B_ENTRY_NOT_FOUND) {
5429 						directory = vnode.Detach();
5430 						dirPutter.SetTo(directory);
5431 						name = clonedName;
5432 						create = true;
5433 					} else
5434 						return status;
5435 				}
5436 			}
5437 
5438 			if (!create) {
5439 				if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5440 					return B_LINK_LIMIT;
5441 
5442 				int fd = open_vnode(vnode.Get(), openMode & ~O_CREAT, kernel);
5443 				// on success keep the vnode reference for the FD
5444 				if (fd >= 0)
5445 					vnode.Detach();
5446 
5447 				return fd;
5448 			}
5449 		}
5450 
5451 		// it doesn't exist yet -- try to create it
5452 
5453 		if (!HAS_FS_CALL(directory, create))
5454 			return B_READ_ONLY_DEVICE;
5455 
5456 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5457 			&cookie, &newID);
5458 		if (status != B_OK
5459 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5460 			return status;
5461 		}
5462 	}
5463 
5464 	if (status != B_OK)
5465 		return status;
5466 
5467 	// the node has been created successfully
5468 
5469 	rw_lock_read_lock(&sVnodeLock);
5470 	vnode.SetTo(lookup_vnode(directory->device, newID));
5471 	rw_lock_read_unlock(&sVnodeLock);
5472 
5473 	if (!vnode.IsSet()) {
5474 		panic("vfs: fs_create() returned success but there is no vnode, "
5475 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5476 		return B_BAD_VALUE;
5477 	}
5478 
5479 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode.Get(), cookie, openMode, kernel);
5480 	if (fd >= 0) {
5481 		vnode.Detach();
5482 		return fd;
5483 	}
5484 
5485 	status = fd;
5486 
5487 	// something went wrong, clean up
5488 
5489 	FS_CALL(vnode.Get(), close, cookie);
5490 	FS_CALL(vnode.Get(), free_cookie, cookie);
5491 
5492 	FS_CALL(directory, unlink, name);
5493 
5494 	return status;
5495 }
5496 
5497 
5498 /*! Calls fs open_dir() on the given vnode and returns a new
5499 	file descriptor for it
5500 */
5501 static int
5502 open_dir_vnode(struct vnode* vnode, bool kernel)
5503 {
5504 	if (!HAS_FS_CALL(vnode, open_dir))
5505 		return B_UNSUPPORTED;
5506 
5507 	void* cookie;
5508 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5509 	if (status != B_OK)
5510 		return status;
5511 
5512 	// directory is opened, create a fd
5513 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5514 	if (status >= 0)
5515 		return status;
5516 
5517 	FS_CALL(vnode, close_dir, cookie);
5518 	FS_CALL(vnode, free_dir_cookie, cookie);
5519 
5520 	return status;
5521 }
5522 
5523 
5524 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5525 	file descriptor for it.
5526 	Used by attr_dir_open(), and attr_dir_open_fd().
5527 */
5528 static int
5529 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5530 {
5531 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5532 		return B_UNSUPPORTED;
5533 
5534 	void* cookie;
5535 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5536 	if (status != B_OK)
5537 		return status;
5538 
5539 	// directory is opened, create a fd
5540 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5541 		kernel);
5542 	if (status >= 0)
5543 		return status;
5544 
5545 	FS_CALL(vnode, close_attr_dir, cookie);
5546 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5547 
5548 	return status;
5549 }
5550 
5551 
5552 static int
5553 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5554 	int openMode, int perms, bool kernel)
5555 {
5556 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5557 		"kernel %d\n", name, openMode, perms, kernel));
5558 
5559 	// get directory to put the new file in
5560 	struct vnode* directory;
5561 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5562 	if (status != B_OK)
5563 		return status;
5564 
5565 	status = create_vnode(directory, name, openMode, perms, kernel);
5566 	put_vnode(directory);
5567 
5568 	return status;
5569 }
5570 
5571 
5572 static int
5573 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5574 {
5575 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5576 		openMode, perms, kernel));
5577 
5578 	// get directory to put the new file in
5579 	char name[B_FILE_NAME_LENGTH];
5580 	VnodePutter directory;
5581 	status_t status = fd_and_path_to_dir_vnode(fd, path, directory, name,
5582 		kernel);
5583 	if (status < 0)
5584 		return status;
5585 
5586 	return create_vnode(directory.Get(), name, openMode, perms, kernel);
5587 }
5588 
5589 
5590 static int
5591 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5592 	int openMode, bool kernel)
5593 {
5594 	if (name == NULL || *name == '\0')
5595 		return B_BAD_VALUE;
5596 
5597 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5598 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5599 
5600 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5601 
5602 	// get the vnode matching the entry_ref
5603 	VnodePutter vnode;
5604 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5605 		kernel, vnode);
5606 	if (status != B_OK)
5607 		return status;
5608 
5609 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5610 		return B_LINK_LIMIT;
5611 
5612 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5613 	if (newFD >= 0) {
5614 		cache_node_opened(vnode.Get(), FDTYPE_FILE, vnode->cache, mountID,
5615 			directoryID, vnode->id, name);
5616 
5617 		// The vnode reference has been transferred to the FD
5618 		vnode.Detach();
5619 	}
5620 
5621 	return newFD;
5622 }
5623 
5624 
5625 static int
5626 file_open(int fd, char* path, int openMode, bool kernel)
5627 {
5628 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5629 
5630 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5631 		fd, path, openMode, kernel));
5632 
5633 	// get the vnode matching the vnode + path combination
5634 	VnodePutter vnode;
5635 	ino_t parentID;
5636 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode,
5637 		&parentID, kernel);
5638 	if (status != B_OK)
5639 		return status;
5640 
5641 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5642 		return B_LINK_LIMIT;
5643 
5644 	// open the vnode
5645 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5646 	if (newFD >= 0) {
5647 		cache_node_opened(vnode.Get(), FDTYPE_FILE, vnode->cache,
5648 			vnode->device, parentID, vnode->id, NULL);
5649 
5650 		// The vnode reference has been transferred to the FD
5651 		vnode.Detach();
5652 	}
5653 
5654 	return newFD;
5655 }
5656 
5657 
5658 static status_t
5659 file_close(struct file_descriptor* descriptor)
5660 {
5661 	struct vnode* vnode = descriptor->u.vnode;
5662 	status_t status = B_OK;
5663 
5664 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5665 
5666 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5667 		vnode->id);
5668 	if (HAS_FS_CALL(vnode, close)) {
5669 		status = FS_CALL(vnode, close, descriptor->cookie);
5670 	}
5671 
5672 	if (status == B_OK) {
5673 		// remove all outstanding locks for this team
5674 		if (HAS_FS_CALL(vnode, release_lock))
5675 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5676 		else
5677 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5678 	}
5679 	return status;
5680 }
5681 
5682 
5683 static void
5684 file_free_fd(struct file_descriptor* descriptor)
5685 {
5686 	struct vnode* vnode = descriptor->u.vnode;
5687 
5688 	if (vnode != NULL) {
5689 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5690 		put_vnode(vnode);
5691 	}
5692 }
5693 
5694 
5695 static status_t
5696 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5697 	size_t* length)
5698 {
5699 	struct vnode* vnode = descriptor->u.vnode;
5700 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5701 		pos, length, *length));
5702 
5703 	if (S_ISDIR(vnode->Type()))
5704 		return B_IS_A_DIRECTORY;
5705 	if (pos != -1 && descriptor->pos == -1)
5706 		return ESPIPE;
5707 
5708 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5709 }
5710 
5711 
5712 static status_t
5713 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5714 	size_t* length)
5715 {
5716 	struct vnode* vnode = descriptor->u.vnode;
5717 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5718 		length));
5719 
5720 	if (S_ISDIR(vnode->Type()))
5721 		return B_IS_A_DIRECTORY;
5722 	if (pos != -1 && descriptor->pos == -1)
5723 		return ESPIPE;
5724 
5725 	if (!HAS_FS_CALL(vnode, write))
5726 		return B_READ_ONLY_DEVICE;
5727 
5728 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5729 }
5730 
5731 
5732 static off_t
5733 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5734 {
5735 	struct vnode* vnode = descriptor->u.vnode;
5736 	off_t offset;
5737 	bool isDevice = false;
5738 
5739 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5740 		seekType));
5741 
5742 	if (descriptor->pos == -1)
5743 		return ESPIPE;
5744 
5745 	switch (vnode->Type() & S_IFMT) {
5746 		// drivers publish block devices as chr, so pick both
5747 		case S_IFBLK:
5748 		case S_IFCHR:
5749 			isDevice = true;
5750 			break;
5751 	}
5752 
5753 	switch (seekType) {
5754 		case SEEK_SET:
5755 			offset = 0;
5756 			break;
5757 		case SEEK_CUR:
5758 			offset = descriptor->pos;
5759 			break;
5760 		case SEEK_END:
5761 		{
5762 			// stat() the node
5763 			if (!HAS_FS_CALL(vnode, read_stat))
5764 				return B_UNSUPPORTED;
5765 
5766 			struct stat stat;
5767 			status_t status = FS_CALL(vnode, read_stat, &stat);
5768 			if (status != B_OK)
5769 				return status;
5770 
5771 			offset = stat.st_size;
5772 
5773 			if (offset == 0 && isDevice) {
5774 				// stat() on regular drivers doesn't report size
5775 				device_geometry geometry;
5776 
5777 				if (HAS_FS_CALL(vnode, ioctl)) {
5778 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5779 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5780 					if (status == B_OK)
5781 						offset = (off_t)geometry.bytes_per_sector
5782 							* geometry.sectors_per_track
5783 							* geometry.cylinder_count
5784 							* geometry.head_count;
5785 				}
5786 			}
5787 
5788 			break;
5789 		}
5790 		case SEEK_DATA:
5791 		case SEEK_HOLE:
5792 		{
5793 			status_t status = B_BAD_VALUE;
5794 			if (HAS_FS_CALL(vnode, ioctl)) {
5795 				offset = pos;
5796 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5797 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5798 					&offset, sizeof(offset));
5799 				if (status == B_OK) {
5800 					if (offset > pos)
5801 						offset -= pos;
5802 					break;
5803 				}
5804 			}
5805 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5806 				return status;
5807 
5808 			// basic implementation with stat() the node
5809 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5810 				return B_BAD_VALUE;
5811 
5812 			struct stat stat;
5813 			status = FS_CALL(vnode, read_stat, &stat);
5814 			if (status != B_OK)
5815 				return status;
5816 
5817 			off_t end = stat.st_size;
5818 			if (pos >= end)
5819 				return ENXIO;
5820 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5821 			break;
5822 		}
5823 		default:
5824 			return B_BAD_VALUE;
5825 	}
5826 
5827 	// assumes off_t is 64 bits wide
5828 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5829 		return B_BUFFER_OVERFLOW;
5830 
5831 	pos += offset;
5832 	if (pos < 0)
5833 		return B_BAD_VALUE;
5834 
5835 	return descriptor->pos = pos;
5836 }
5837 
5838 
5839 static status_t
5840 file_select(struct file_descriptor* descriptor, uint8 event,
5841 	struct selectsync* sync)
5842 {
5843 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5844 
5845 	struct vnode* vnode = descriptor->u.vnode;
5846 
5847 	// If the FS has no select() hook, notify select() now.
5848 	if (!HAS_FS_CALL(vnode, select)) {
5849 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5850 			return notify_select_event(sync, event);
5851 		else
5852 			return B_OK;
5853 	}
5854 
5855 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5856 }
5857 
5858 
5859 static status_t
5860 file_deselect(struct file_descriptor* descriptor, uint8 event,
5861 	struct selectsync* sync)
5862 {
5863 	struct vnode* vnode = descriptor->u.vnode;
5864 
5865 	if (!HAS_FS_CALL(vnode, deselect))
5866 		return B_OK;
5867 
5868 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5869 }
5870 
5871 
5872 static status_t
5873 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5874 	bool kernel)
5875 {
5876 	struct vnode* vnode;
5877 	status_t status;
5878 
5879 	if (name == NULL || *name == '\0')
5880 		return B_BAD_VALUE;
5881 
5882 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5883 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5884 
5885 	status = get_vnode(mountID, parentID, &vnode, true, false);
5886 	if (status != B_OK)
5887 		return status;
5888 
5889 	if (HAS_FS_CALL(vnode, create_dir))
5890 		status = FS_CALL(vnode, create_dir, name, perms);
5891 	else
5892 		status = B_READ_ONLY_DEVICE;
5893 
5894 	put_vnode(vnode);
5895 	return status;
5896 }
5897 
5898 
5899 static status_t
5900 dir_create(int fd, char* path, int perms, bool kernel)
5901 {
5902 	char filename[B_FILE_NAME_LENGTH];
5903 	status_t status;
5904 
5905 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5906 		kernel));
5907 
5908 	VnodePutter vnode;
5909 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
5910 	if (status < 0)
5911 		return status;
5912 
5913 	if (HAS_FS_CALL(vnode, create_dir)) {
5914 		status = FS_CALL(vnode.Get(), create_dir, filename, perms);
5915 	} else
5916 		status = B_READ_ONLY_DEVICE;
5917 
5918 	return status;
5919 }
5920 
5921 
5922 static int
5923 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5924 {
5925 	FUNCTION(("dir_open_entry_ref()\n"));
5926 
5927 	if (name && name[0] == '\0')
5928 		return B_BAD_VALUE;
5929 
5930 	// get the vnode matching the entry_ref/node_ref
5931 	VnodePutter vnode;
5932 	status_t status;
5933 	if (name) {
5934 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5935 			vnode);
5936 	} else {
5937 		struct vnode* temp = NULL;
5938 		status = get_vnode(mountID, parentID, &temp, true, false);
5939 		vnode.SetTo(temp);
5940 	}
5941 	if (status != B_OK)
5942 		return status;
5943 
5944 	int newFD = open_dir_vnode(vnode.Get(), kernel);
5945 	if (newFD >= 0) {
5946 		cache_node_opened(vnode.Get(), FDTYPE_DIR, vnode->cache, mountID, parentID,
5947 			vnode->id, name);
5948 
5949 		// The vnode reference has been transferred to the FD
5950 		vnode.Detach();
5951 	}
5952 
5953 	return newFD;
5954 }
5955 
5956 
5957 static int
5958 dir_open(int fd, char* path, bool kernel)
5959 {
5960 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5961 		kernel));
5962 
5963 	// get the vnode matching the vnode + path combination
5964 	VnodePutter vnode;
5965 	ino_t parentID;
5966 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, &parentID,
5967 		kernel);
5968 	if (status != B_OK)
5969 		return status;
5970 
5971 	// open the dir
5972 	int newFD = open_dir_vnode(vnode.Get(), kernel);
5973 	if (newFD >= 0) {
5974 		cache_node_opened(vnode.Get(), FDTYPE_DIR, vnode->cache, vnode->device,
5975 			parentID, vnode->id, NULL);
5976 
5977 		// The vnode reference has been transferred to the FD
5978 		vnode.Detach();
5979 	}
5980 
5981 	return newFD;
5982 }
5983 
5984 
5985 static status_t
5986 dir_close(struct file_descriptor* descriptor)
5987 {
5988 	struct vnode* vnode = descriptor->u.vnode;
5989 
5990 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5991 
5992 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5993 		vnode->id);
5994 	if (HAS_FS_CALL(vnode, close_dir))
5995 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5996 
5997 	return B_OK;
5998 }
5999 
6000 
6001 static void
6002 dir_free_fd(struct file_descriptor* descriptor)
6003 {
6004 	struct vnode* vnode = descriptor->u.vnode;
6005 
6006 	if (vnode != NULL) {
6007 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6008 		put_vnode(vnode);
6009 	}
6010 }
6011 
6012 
6013 static status_t
6014 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6015 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6016 {
6017 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6018 		bufferSize, _count);
6019 }
6020 
6021 
6022 static status_t
6023 fix_dirent(struct vnode* parent, struct dirent* entry,
6024 	struct io_context* ioContext)
6025 {
6026 	// set d_pdev and d_pino
6027 	entry->d_pdev = parent->device;
6028 	entry->d_pino = parent->id;
6029 
6030 	// If this is the ".." entry and the directory covering another vnode,
6031 	// we need to replace d_dev and d_ino with the actual values.
6032 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6033 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6034 			ioContext);
6035 	}
6036 
6037 	// resolve covered vnodes
6038 	ReadLocker _(&sVnodeLock);
6039 
6040 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6041 	if (vnode != NULL && vnode->covered_by != NULL) {
6042 		do {
6043 			vnode = vnode->covered_by;
6044 		} while (vnode->covered_by != NULL);
6045 
6046 		entry->d_dev = vnode->device;
6047 		entry->d_ino = vnode->id;
6048 	}
6049 
6050 	return B_OK;
6051 }
6052 
6053 
6054 static status_t
6055 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6056 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6057 {
6058 	if (!HAS_FS_CALL(vnode, read_dir))
6059 		return B_UNSUPPORTED;
6060 
6061 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6062 		_count);
6063 	if (error != B_OK)
6064 		return error;
6065 
6066 	// we need to adjust the read dirents
6067 	uint32 count = *_count;
6068 	for (uint32 i = 0; i < count; i++) {
6069 		error = fix_dirent(vnode, buffer, ioContext);
6070 		if (error != B_OK)
6071 			return error;
6072 
6073 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6074 	}
6075 
6076 	return error;
6077 }
6078 
6079 
6080 static status_t
6081 dir_rewind(struct file_descriptor* descriptor)
6082 {
6083 	struct vnode* vnode = descriptor->u.vnode;
6084 
6085 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6086 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6087 	}
6088 
6089 	return B_UNSUPPORTED;
6090 }
6091 
6092 
6093 static status_t
6094 dir_remove(int fd, char* path, bool kernel)
6095 {
6096 	char name[B_FILE_NAME_LENGTH];
6097 	status_t status;
6098 
6099 	if (path != NULL) {
6100 		// we need to make sure our path name doesn't stop with "/", ".",
6101 		// or ".."
6102 		char* lastSlash;
6103 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6104 			char* leaf = lastSlash + 1;
6105 			if (!strcmp(leaf, ".."))
6106 				return B_NOT_ALLOWED;
6107 
6108 			// omit multiple slashes
6109 			while (lastSlash > path && lastSlash[-1] == '/')
6110 				lastSlash--;
6111 
6112 			if (leaf[0]
6113 				&& strcmp(leaf, ".")) {
6114 				break;
6115 			}
6116 			// "name/" -> "name", or "name/." -> "name"
6117 			lastSlash[0] = '\0';
6118 		}
6119 
6120 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6121 			return B_NOT_ALLOWED;
6122 	}
6123 
6124 	VnodePutter directory;
6125 	status = fd_and_path_to_dir_vnode(fd, path, directory, name, kernel);
6126 	if (status != B_OK)
6127 		return status;
6128 
6129 	if (HAS_FS_CALL(directory, remove_dir))
6130 		status = FS_CALL(directory.Get(), remove_dir, name);
6131 	else
6132 		status = B_READ_ONLY_DEVICE;
6133 
6134 	return status;
6135 }
6136 
6137 
6138 static status_t
6139 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6140 	size_t length)
6141 {
6142 	struct vnode* vnode = descriptor->u.vnode;
6143 
6144 	if (HAS_FS_CALL(vnode, ioctl))
6145 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6146 
6147 	return B_DEV_INVALID_IOCTL;
6148 }
6149 
6150 
6151 static status_t
6152 common_fcntl(int fd, int op, size_t argument, bool kernel)
6153 {
6154 	struct flock flock;
6155 
6156 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6157 		fd, op, argument, kernel ? "kernel" : "user"));
6158 
6159 	struct io_context* context = get_current_io_context(kernel);
6160 
6161 	FileDescriptorPutter descriptor(get_fd(context, fd));
6162 	if (!descriptor.IsSet())
6163 		return B_FILE_ERROR;
6164 
6165 	struct vnode* vnode = fd_vnode(descriptor.Get());
6166 
6167 	status_t status = B_OK;
6168 
6169 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6170 		if (descriptor->type != FDTYPE_FILE)
6171 			status = B_BAD_VALUE;
6172 		else if (kernel)
6173 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6174 		else if (user_memcpy(&flock, (struct flock*)argument,
6175 				sizeof(struct flock)) != B_OK)
6176 			status = B_BAD_ADDRESS;
6177 		if (status != B_OK)
6178 			return status;
6179 	}
6180 
6181 	switch (op) {
6182 		case F_SETFD:
6183 		{
6184 			// Set file descriptor flags
6185 
6186 			// O_CLOEXEC is the only flag available at this time
6187 			mutex_lock(&context->io_mutex);
6188 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6189 			mutex_unlock(&context->io_mutex);
6190 
6191 			status = B_OK;
6192 			break;
6193 		}
6194 
6195 		case F_GETFD:
6196 		{
6197 			// Get file descriptor flags
6198 			mutex_lock(&context->io_mutex);
6199 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6200 			mutex_unlock(&context->io_mutex);
6201 			break;
6202 		}
6203 
6204 		case F_SETFL:
6205 			// Set file descriptor open mode
6206 
6207 			// we only accept changes to O_APPEND and O_NONBLOCK
6208 			argument &= O_APPEND | O_NONBLOCK;
6209 			if (descriptor->ops->fd_set_flags != NULL) {
6210 				status = descriptor->ops->fd_set_flags(descriptor.Get(), argument);
6211 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6212 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6213 					(int)argument);
6214 			} else
6215 				status = B_UNSUPPORTED;
6216 
6217 			if (status == B_OK) {
6218 				// update this descriptor's open_mode field
6219 				descriptor->open_mode = (descriptor->open_mode
6220 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6221 			}
6222 
6223 			break;
6224 
6225 		case F_GETFL:
6226 			// Get file descriptor open mode
6227 			status = descriptor->open_mode;
6228 			break;
6229 
6230 		case F_DUPFD:
6231 		case F_DUPFD_CLOEXEC:
6232 		{
6233 			status = new_fd_etc(context, descriptor.Get(), (int)argument);
6234 			if (status >= 0) {
6235 				mutex_lock(&context->io_mutex);
6236 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6237 				mutex_unlock(&context->io_mutex);
6238 
6239 				atomic_add(&descriptor->ref_count, 1);
6240 			}
6241 			break;
6242 		}
6243 
6244 		case F_GETLK:
6245 			if (vnode != NULL) {
6246 				struct flock normalizedLock;
6247 
6248 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6249 				status = normalize_flock(descriptor.Get(), &normalizedLock);
6250 				if (status != B_OK)
6251 					break;
6252 
6253 				if (HAS_FS_CALL(vnode, test_lock)) {
6254 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6255 						&normalizedLock);
6256 				} else
6257 					status = test_advisory_lock(vnode, &normalizedLock);
6258 				if (status == B_OK) {
6259 					if (normalizedLock.l_type == F_UNLCK) {
6260 						// no conflicting lock found, copy back the same struct
6261 						// we were given except change type to F_UNLCK
6262 						flock.l_type = F_UNLCK;
6263 						if (kernel) {
6264 							memcpy((struct flock*)argument, &flock,
6265 								sizeof(struct flock));
6266 						} else {
6267 							status = user_memcpy((struct flock*)argument,
6268 								&flock, sizeof(struct flock));
6269 						}
6270 					} else {
6271 						// a conflicting lock was found, copy back its range and
6272 						// type
6273 						if (normalizedLock.l_len == OFF_MAX)
6274 							normalizedLock.l_len = 0;
6275 
6276 						if (kernel) {
6277 							memcpy((struct flock*)argument,
6278 								&normalizedLock, sizeof(struct flock));
6279 						} else {
6280 							status = user_memcpy((struct flock*)argument,
6281 								&normalizedLock, sizeof(struct flock));
6282 						}
6283 					}
6284 				}
6285 			} else
6286 				status = B_BAD_VALUE;
6287 			break;
6288 
6289 		case F_SETLK:
6290 		case F_SETLKW:
6291 			status = normalize_flock(descriptor.Get(), &flock);
6292 			if (status != B_OK)
6293 				break;
6294 
6295 			if (vnode == NULL) {
6296 				status = B_BAD_VALUE;
6297 			} else if (flock.l_type == F_UNLCK) {
6298 				if (HAS_FS_CALL(vnode, release_lock)) {
6299 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6300 						&flock);
6301 				} else {
6302 					status = release_advisory_lock(vnode, context, NULL,
6303 						&flock);
6304 				}
6305 			} else {
6306 				// the open mode must match the lock type
6307 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6308 						&& flock.l_type == F_WRLCK)
6309 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6310 						&& flock.l_type == F_RDLCK))
6311 					status = B_FILE_ERROR;
6312 				else {
6313 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6314 						status = FS_CALL(vnode, acquire_lock,
6315 							descriptor->cookie, &flock, op == F_SETLKW);
6316 					} else {
6317 						status = acquire_advisory_lock(vnode, context, NULL,
6318 							&flock, op == F_SETLKW);
6319 					}
6320 				}
6321 			}
6322 			break;
6323 
6324 		// ToDo: add support for more ops?
6325 
6326 		default:
6327 			status = B_BAD_VALUE;
6328 	}
6329 
6330 	return status;
6331 }
6332 
6333 
6334 static status_t
6335 common_sync(int fd, bool kernel)
6336 {
6337 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6338 
6339 	struct vnode* vnode;
6340 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6341 	if (!descriptor.IsSet())
6342 		return B_FILE_ERROR;
6343 
6344 	status_t status;
6345 	if (HAS_FS_CALL(vnode, fsync))
6346 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6347 	else
6348 		status = B_UNSUPPORTED;
6349 
6350 	return status;
6351 }
6352 
6353 
6354 static status_t
6355 common_lock_node(int fd, bool kernel)
6356 {
6357 	struct vnode* vnode;
6358 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6359 	if (!descriptor.IsSet())
6360 		return B_FILE_ERROR;
6361 
6362 	status_t status = B_OK;
6363 
6364 	// We need to set the locking atomically - someone
6365 	// else might set one at the same time
6366 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6367 			descriptor.Get(), (file_descriptor*)NULL) != NULL)
6368 		status = B_BUSY;
6369 
6370 	return status;
6371 }
6372 
6373 
6374 static status_t
6375 common_unlock_node(int fd, bool kernel)
6376 {
6377 	struct vnode* vnode;
6378 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6379 	if (!descriptor.IsSet())
6380 		return B_FILE_ERROR;
6381 
6382 	status_t status = B_OK;
6383 
6384 	// We need to set the locking atomically - someone
6385 	// else might set one at the same time
6386 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6387 			(file_descriptor*)NULL, descriptor.Get()) != descriptor.Get())
6388 		status = B_BAD_VALUE;
6389 
6390 	return status;
6391 }
6392 
6393 
6394 static status_t
6395 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6396 {
6397 	if (offset < 0 || length == 0)
6398 		return B_BAD_VALUE;
6399 	if (offset > OFF_MAX - length)
6400 		return B_FILE_TOO_LARGE;
6401 
6402 	struct vnode* vnode;
6403 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6404 	if (!descriptor.IsSet() || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6405 		return B_FILE_ERROR;
6406 
6407 	switch (vnode->Type() & S_IFMT) {
6408 		case S_IFIFO:
6409 		case S_IFSOCK:
6410 			return ESPIPE;
6411 
6412 		case S_IFBLK:
6413 		case S_IFCHR:
6414 		case S_IFDIR:
6415 		case S_IFLNK:
6416 			return B_DEVICE_NOT_FOUND;
6417 
6418 		case S_IFREG:
6419 			break;
6420 	}
6421 
6422 	status_t status = B_OK;
6423 	if (HAS_FS_CALL(vnode, preallocate)) {
6424 		status = FS_CALL(vnode, preallocate, offset, length);
6425 	} else {
6426 		status = HAS_FS_CALL(vnode, write)
6427 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6428 	}
6429 
6430 	return status;
6431 }
6432 
6433 
6434 static status_t
6435 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6436 	bool kernel)
6437 {
6438 	VnodePutter vnode;
6439 	status_t status;
6440 
6441 	status = fd_and_path_to_vnode(fd, path, false, vnode, NULL, kernel);
6442 	if (status != B_OK)
6443 		return status;
6444 
6445 	if (HAS_FS_CALL(vnode, read_symlink)) {
6446 		status = FS_CALL(vnode.Get(), read_symlink, buffer, _bufferSize);
6447 	} else
6448 		status = B_BAD_VALUE;
6449 
6450 	return status;
6451 }
6452 
6453 
6454 static status_t
6455 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6456 	bool kernel)
6457 {
6458 	// path validity checks have to be in the calling function!
6459 	char name[B_FILE_NAME_LENGTH];
6460 	status_t status;
6461 
6462 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6463 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6464 
6465 	VnodePutter vnode;
6466 	status = fd_and_path_to_dir_vnode(fd, path, vnode, name, kernel);
6467 	if (status != B_OK)
6468 		return status;
6469 
6470 	if (HAS_FS_CALL(vnode, create_symlink))
6471 		status = FS_CALL(vnode.Get(), create_symlink, name, toPath, mode);
6472 	else {
6473 		status = HAS_FS_CALL(vnode, write)
6474 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6475 	}
6476 
6477 	return status;
6478 }
6479 
6480 
6481 static status_t
6482 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6483 	bool traverseLeafLink, bool kernel)
6484 {
6485 	// path validity checks have to be in the calling function!
6486 
6487 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6488 		toPath, kernel));
6489 
6490 	char name[B_FILE_NAME_LENGTH];
6491 	VnodePutter directory;
6492 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, directory, name,
6493 		kernel);
6494 	if (status != B_OK)
6495 		return status;
6496 
6497 	VnodePutter vnode;
6498 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, vnode, NULL,
6499 		kernel);
6500 	if (status != B_OK)
6501 		return status;
6502 
6503 	if (directory->mount != vnode->mount)
6504 		return B_CROSS_DEVICE_LINK;
6505 
6506 	if (HAS_FS_CALL(directory, link))
6507 		status = FS_CALL(directory.Get(), link, name, vnode.Get());
6508 	else
6509 		status = B_READ_ONLY_DEVICE;
6510 
6511 	return status;
6512 }
6513 
6514 
6515 static status_t
6516 common_unlink(int fd, char* path, bool kernel)
6517 {
6518 	char filename[B_FILE_NAME_LENGTH];
6519 	status_t status;
6520 
6521 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6522 		kernel));
6523 
6524 	VnodePutter vnode;
6525 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
6526 	if (status < 0)
6527 		return status;
6528 
6529 	if (HAS_FS_CALL(vnode, unlink))
6530 		status = FS_CALL(vnode.Get(), unlink, filename);
6531 	else
6532 		status = B_READ_ONLY_DEVICE;
6533 
6534 	return status;
6535 }
6536 
6537 
6538 static status_t
6539 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6540 {
6541 	status_t status;
6542 
6543 	// TODO: honor effectiveUserGroup argument
6544 
6545 	VnodePutter vnode;
6546 	status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
6547 	if (status != B_OK)
6548 		return status;
6549 
6550 	if (HAS_FS_CALL(vnode, access))
6551 		status = FS_CALL(vnode.Get(), access, mode);
6552 	else
6553 		status = B_OK;
6554 
6555 	return status;
6556 }
6557 
6558 
6559 static status_t
6560 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6561 {
6562 	status_t status;
6563 
6564 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6565 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6566 
6567 	VnodePutter fromVnode;
6568 	char fromName[B_FILE_NAME_LENGTH];
6569 	status = fd_and_path_to_dir_vnode(fd, path, fromVnode, fromName, kernel);
6570 	if (status != B_OK)
6571 		return status;
6572 
6573 	VnodePutter toVnode;
6574 	char toName[B_FILE_NAME_LENGTH];
6575 	status = fd_and_path_to_dir_vnode(newFD, newPath, toVnode, toName, kernel);
6576 	if (status != B_OK)
6577 		return status;
6578 
6579 	if (fromVnode->device != toVnode->device)
6580 		return B_CROSS_DEVICE_LINK;
6581 
6582 	if (fromVnode.Get() == toVnode.Get() && !strcmp(fromName, toName))
6583 		return B_OK;
6584 
6585 	if (fromName[0] == '\0' || toName[0] == '\0'
6586 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6587 		|| !strcmp(toName, ".") || !strcmp(toName, "..")) {
6588 		return B_BAD_VALUE;
6589 	}
6590 
6591 	if (HAS_FS_CALL(fromVnode, rename))
6592 		status = FS_CALL(fromVnode.Get(), rename, fromName, toVnode.Get(), toName);
6593 	else
6594 		status = B_READ_ONLY_DEVICE;
6595 
6596 	return status;
6597 }
6598 
6599 
6600 static status_t
6601 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6602 {
6603 	struct vnode* vnode = descriptor->u.vnode;
6604 
6605 	FUNCTION(("common_read_stat: stat %p\n", stat));
6606 
6607 	// TODO: remove this once all file systems properly set them!
6608 	stat->st_crtim.tv_nsec = 0;
6609 	stat->st_ctim.tv_nsec = 0;
6610 	stat->st_mtim.tv_nsec = 0;
6611 	stat->st_atim.tv_nsec = 0;
6612 
6613 	return vfs_stat_vnode(vnode, stat);
6614 }
6615 
6616 
6617 static status_t
6618 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6619 	int statMask)
6620 {
6621 	struct vnode* vnode = descriptor->u.vnode;
6622 
6623 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6624 		vnode, stat, statMask));
6625 
6626 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6627 		&& (statMask & B_STAT_SIZE) != 0) {
6628 		return B_BAD_VALUE;
6629 	}
6630 
6631 	if (!HAS_FS_CALL(vnode, write_stat))
6632 		return B_READ_ONLY_DEVICE;
6633 
6634 	return FS_CALL(vnode, write_stat, stat, statMask);
6635 }
6636 
6637 
6638 static status_t
6639 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6640 	struct stat* stat, bool kernel)
6641 {
6642 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6643 		stat));
6644 
6645 	VnodePutter vnode;
6646 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6647 		NULL, kernel);
6648 	if (status != B_OK)
6649 		return status;
6650 
6651 	status = vfs_stat_vnode(vnode.Get(), stat);
6652 
6653 	return status;
6654 }
6655 
6656 
6657 static status_t
6658 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6659 	const struct stat* stat, int statMask, bool kernel)
6660 {
6661 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6662 		"kernel %d\n", fd, path, stat, statMask, kernel));
6663 
6664 	VnodePutter vnode;
6665 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6666 		NULL, kernel);
6667 	if (status != B_OK)
6668 		return status;
6669 
6670 	if (HAS_FS_CALL(vnode, write_stat))
6671 		status = FS_CALL(vnode.Get(), write_stat, stat, statMask);
6672 	else
6673 		status = B_READ_ONLY_DEVICE;
6674 
6675 	return status;
6676 }
6677 
6678 
6679 static int
6680 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6681 {
6682 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6683 		kernel));
6684 
6685 	VnodePutter vnode;
6686 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6687 		NULL, kernel);
6688 	if (status != B_OK)
6689 		return status;
6690 
6691 	status = open_attr_dir_vnode(vnode.Get(), kernel);
6692 	if (status >= 0)
6693 		vnode.Detach();
6694 
6695 	return status;
6696 }
6697 
6698 
6699 static status_t
6700 attr_dir_close(struct file_descriptor* descriptor)
6701 {
6702 	struct vnode* vnode = descriptor->u.vnode;
6703 
6704 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6705 
6706 	if (HAS_FS_CALL(vnode, close_attr_dir))
6707 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6708 
6709 	return B_OK;
6710 }
6711 
6712 
6713 static void
6714 attr_dir_free_fd(struct file_descriptor* descriptor)
6715 {
6716 	struct vnode* vnode = descriptor->u.vnode;
6717 
6718 	if (vnode != NULL) {
6719 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6720 		put_vnode(vnode);
6721 	}
6722 }
6723 
6724 
6725 static status_t
6726 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6727 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6728 {
6729 	struct vnode* vnode = descriptor->u.vnode;
6730 
6731 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6732 
6733 	if (HAS_FS_CALL(vnode, read_attr_dir))
6734 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6735 			bufferSize, _count);
6736 
6737 	return B_UNSUPPORTED;
6738 }
6739 
6740 
6741 static status_t
6742 attr_dir_rewind(struct file_descriptor* descriptor)
6743 {
6744 	struct vnode* vnode = descriptor->u.vnode;
6745 
6746 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6747 
6748 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6749 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6750 
6751 	return B_UNSUPPORTED;
6752 }
6753 
6754 
6755 static int
6756 attr_create(int fd, char* path, const char* name, uint32 type,
6757 	int openMode, bool kernel)
6758 {
6759 	if (name == NULL || *name == '\0')
6760 		return B_BAD_VALUE;
6761 
6762 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6763 	VnodePutter vnode;
6764 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6765 		kernel);
6766 	if (status != B_OK)
6767 		return status;
6768 
6769 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6770 		return B_LINK_LIMIT;
6771 
6772 	if (!HAS_FS_CALL(vnode, create_attr))
6773 		return B_READ_ONLY_DEVICE;
6774 
6775 	void* cookie;
6776 	status = FS_CALL(vnode.Get(), create_attr, name, type, openMode, &cookie);
6777 	if (status != B_OK)
6778 		return status;
6779 
6780 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode.Get(), cookie, openMode, kernel);
6781 	if (fd >= 0) {
6782 		vnode.Detach();
6783 		return fd;
6784 	}
6785 
6786 	status = fd;
6787 
6788 	FS_CALL(vnode.Get(), close_attr, cookie);
6789 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6790 
6791 	FS_CALL(vnode.Get(), remove_attr, name);
6792 
6793 	return status;
6794 }
6795 
6796 
6797 static int
6798 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6799 {
6800 	if (name == NULL || *name == '\0')
6801 		return B_BAD_VALUE;
6802 
6803 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6804 	VnodePutter vnode;
6805 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6806 		kernel);
6807 	if (status != B_OK)
6808 		return status;
6809 
6810 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6811 		return B_LINK_LIMIT;
6812 
6813 	if (!HAS_FS_CALL(vnode, open_attr))
6814 		return B_UNSUPPORTED;
6815 
6816 	void* cookie;
6817 	status = FS_CALL(vnode.Get(), open_attr, name, openMode, &cookie);
6818 	if (status != B_OK)
6819 		return status;
6820 
6821 	// now we only need a file descriptor for this attribute and we're done
6822 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode.Get(), cookie, openMode, kernel);
6823 	if (fd >= 0) {
6824 		vnode.Detach();
6825 		return fd;
6826 	}
6827 
6828 	status = fd;
6829 
6830 	FS_CALL(vnode.Get(), close_attr, cookie);
6831 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6832 
6833 	return status;
6834 }
6835 
6836 
6837 static status_t
6838 attr_close(struct file_descriptor* descriptor)
6839 {
6840 	struct vnode* vnode = descriptor->u.vnode;
6841 
6842 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6843 
6844 	if (HAS_FS_CALL(vnode, close_attr))
6845 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6846 
6847 	return B_OK;
6848 }
6849 
6850 
6851 static void
6852 attr_free_fd(struct file_descriptor* descriptor)
6853 {
6854 	struct vnode* vnode = descriptor->u.vnode;
6855 
6856 	if (vnode != NULL) {
6857 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6858 		put_vnode(vnode);
6859 	}
6860 }
6861 
6862 
6863 static status_t
6864 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6865 	size_t* length)
6866 {
6867 	struct vnode* vnode = descriptor->u.vnode;
6868 
6869 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6870 		pos, length, *length));
6871 
6872 	if (!HAS_FS_CALL(vnode, read_attr))
6873 		return B_UNSUPPORTED;
6874 
6875 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6876 }
6877 
6878 
6879 static status_t
6880 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6881 	size_t* length)
6882 {
6883 	struct vnode* vnode = descriptor->u.vnode;
6884 
6885 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6886 		length));
6887 
6888 	if (!HAS_FS_CALL(vnode, write_attr))
6889 		return B_UNSUPPORTED;
6890 
6891 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6892 }
6893 
6894 
6895 static off_t
6896 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6897 {
6898 	off_t offset;
6899 
6900 	switch (seekType) {
6901 		case SEEK_SET:
6902 			offset = 0;
6903 			break;
6904 		case SEEK_CUR:
6905 			offset = descriptor->pos;
6906 			break;
6907 		case SEEK_END:
6908 		{
6909 			struct vnode* vnode = descriptor->u.vnode;
6910 			if (!HAS_FS_CALL(vnode, read_stat))
6911 				return B_UNSUPPORTED;
6912 
6913 			struct stat stat;
6914 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6915 				&stat);
6916 			if (status != B_OK)
6917 				return status;
6918 
6919 			offset = stat.st_size;
6920 			break;
6921 		}
6922 		default:
6923 			return B_BAD_VALUE;
6924 	}
6925 
6926 	// assumes off_t is 64 bits wide
6927 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6928 		return B_BUFFER_OVERFLOW;
6929 
6930 	pos += offset;
6931 	if (pos < 0)
6932 		return B_BAD_VALUE;
6933 
6934 	return descriptor->pos = pos;
6935 }
6936 
6937 
6938 static status_t
6939 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6940 {
6941 	struct vnode* vnode = descriptor->u.vnode;
6942 
6943 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6944 
6945 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6946 		return B_UNSUPPORTED;
6947 
6948 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6949 }
6950 
6951 
6952 static status_t
6953 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6954 	int statMask)
6955 {
6956 	struct vnode* vnode = descriptor->u.vnode;
6957 
6958 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6959 
6960 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6961 		return B_READ_ONLY_DEVICE;
6962 
6963 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6964 }
6965 
6966 
6967 static status_t
6968 attr_remove(int fd, const char* name, bool kernel)
6969 {
6970 	if (name == NULL || *name == '\0')
6971 		return B_BAD_VALUE;
6972 
6973 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6974 		kernel));
6975 
6976 	struct vnode* vnode;
6977 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6978 	if (!descriptor.IsSet())
6979 		return B_FILE_ERROR;
6980 
6981 	status_t status;
6982 	if (HAS_FS_CALL(vnode, remove_attr))
6983 		status = FS_CALL(vnode, remove_attr, name);
6984 	else
6985 		status = B_READ_ONLY_DEVICE;
6986 
6987 	return status;
6988 }
6989 
6990 
6991 static status_t
6992 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6993 	bool kernel)
6994 {
6995 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6996 		|| *toName == '\0')
6997 		return B_BAD_VALUE;
6998 
6999 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7000 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7001 
7002 	struct vnode* fromVnode;
7003 	FileDescriptorPutter fromDescriptor(get_fd_and_vnode(fromFD, &fromVnode, kernel));
7004 	if (!fromDescriptor.IsSet())
7005 		return B_FILE_ERROR;
7006 
7007 	struct vnode* toVnode;
7008 	FileDescriptorPutter toDescriptor(get_fd_and_vnode(toFD, &toVnode, kernel));
7009 	if (!toDescriptor.IsSet())
7010 		return B_FILE_ERROR;
7011 
7012 	// are the files on the same volume?
7013 	if (fromVnode->device != toVnode->device)
7014 		return B_CROSS_DEVICE_LINK;
7015 
7016 	status_t status;
7017 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7018 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7019 	} else
7020 		status = B_READ_ONLY_DEVICE;
7021 
7022 	return status;
7023 }
7024 
7025 
7026 static int
7027 index_dir_open(dev_t mountID, bool kernel)
7028 {
7029 	struct fs_mount* mount;
7030 	void* cookie;
7031 
7032 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7033 		kernel));
7034 
7035 	status_t status = get_mount(mountID, &mount);
7036 	if (status != B_OK)
7037 		return status;
7038 
7039 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7040 		status = B_UNSUPPORTED;
7041 		goto error;
7042 	}
7043 
7044 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7045 	if (status != B_OK)
7046 		goto error;
7047 
7048 	// get fd for the index directory
7049 	int fd;
7050 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7051 	if (fd >= 0)
7052 		return fd;
7053 
7054 	// something went wrong
7055 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7056 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7057 
7058 	status = fd;
7059 
7060 error:
7061 	put_mount(mount);
7062 	return status;
7063 }
7064 
7065 
7066 static status_t
7067 index_dir_close(struct file_descriptor* descriptor)
7068 {
7069 	struct fs_mount* mount = descriptor->u.mount;
7070 
7071 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7072 
7073 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7074 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7075 
7076 	return B_OK;
7077 }
7078 
7079 
7080 static void
7081 index_dir_free_fd(struct file_descriptor* descriptor)
7082 {
7083 	struct fs_mount* mount = descriptor->u.mount;
7084 
7085 	if (mount != NULL) {
7086 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7087 		put_mount(mount);
7088 	}
7089 }
7090 
7091 
7092 static status_t
7093 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7094 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7095 {
7096 	struct fs_mount* mount = descriptor->u.mount;
7097 
7098 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7099 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7100 			bufferSize, _count);
7101 	}
7102 
7103 	return B_UNSUPPORTED;
7104 }
7105 
7106 
7107 static status_t
7108 index_dir_rewind(struct file_descriptor* descriptor)
7109 {
7110 	struct fs_mount* mount = descriptor->u.mount;
7111 
7112 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7113 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7114 
7115 	return B_UNSUPPORTED;
7116 }
7117 
7118 
7119 static status_t
7120 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7121 	bool kernel)
7122 {
7123 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7124 		mountID, name, kernel));
7125 
7126 	struct fs_mount* mount;
7127 	status_t status = get_mount(mountID, &mount);
7128 	if (status != B_OK)
7129 		return status;
7130 
7131 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7132 		status = B_READ_ONLY_DEVICE;
7133 		goto out;
7134 	}
7135 
7136 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7137 
7138 out:
7139 	put_mount(mount);
7140 	return status;
7141 }
7142 
7143 
7144 #if 0
7145 static status_t
7146 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7147 {
7148 	struct vnode* vnode = descriptor->u.vnode;
7149 
7150 	// ToDo: currently unused!
7151 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7152 	if (!HAS_FS_CALL(vnode, read_index_stat))
7153 		return B_UNSUPPORTED;
7154 
7155 	return B_UNSUPPORTED;
7156 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7157 }
7158 
7159 
7160 static void
7161 index_free_fd(struct file_descriptor* descriptor)
7162 {
7163 	struct vnode* vnode = descriptor->u.vnode;
7164 
7165 	if (vnode != NULL) {
7166 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7167 		put_vnode(vnode);
7168 	}
7169 }
7170 #endif
7171 
7172 
7173 static status_t
7174 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7175 	bool kernel)
7176 {
7177 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7178 		mountID, name, kernel));
7179 
7180 	struct fs_mount* mount;
7181 	status_t status = get_mount(mountID, &mount);
7182 	if (status != B_OK)
7183 		return status;
7184 
7185 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7186 		status = B_UNSUPPORTED;
7187 		goto out;
7188 	}
7189 
7190 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7191 
7192 out:
7193 	put_mount(mount);
7194 	return status;
7195 }
7196 
7197 
7198 static status_t
7199 index_remove(dev_t mountID, const char* name, bool kernel)
7200 {
7201 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7202 		mountID, name, kernel));
7203 
7204 	struct fs_mount* mount;
7205 	status_t status = get_mount(mountID, &mount);
7206 	if (status != B_OK)
7207 		return status;
7208 
7209 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7210 		status = B_READ_ONLY_DEVICE;
7211 		goto out;
7212 	}
7213 
7214 	status = FS_MOUNT_CALL(mount, remove_index, name);
7215 
7216 out:
7217 	put_mount(mount);
7218 	return status;
7219 }
7220 
7221 
7222 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7223 		It would be nice if the FS would find some more kernel support
7224 		for them.
7225 		For example, query parsing should be moved into the kernel.
7226 */
7227 static int
7228 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7229 	int32 token, bool kernel)
7230 {
7231 	struct fs_mount* mount;
7232 	void* cookie;
7233 
7234 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7235 		device, query, kernel));
7236 
7237 	status_t status = get_mount(device, &mount);
7238 	if (status != B_OK)
7239 		return status;
7240 
7241 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7242 		status = B_UNSUPPORTED;
7243 		goto error;
7244 	}
7245 
7246 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7247 		&cookie);
7248 	if (status != B_OK)
7249 		goto error;
7250 
7251 	// get fd for the index directory
7252 	int fd;
7253 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7254 	if (fd >= 0)
7255 		return fd;
7256 
7257 	status = fd;
7258 
7259 	// something went wrong
7260 	FS_MOUNT_CALL(mount, close_query, cookie);
7261 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7262 
7263 error:
7264 	put_mount(mount);
7265 	return status;
7266 }
7267 
7268 
7269 static status_t
7270 query_close(struct file_descriptor* descriptor)
7271 {
7272 	struct fs_mount* mount = descriptor->u.mount;
7273 
7274 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7275 
7276 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7277 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7278 
7279 	return B_OK;
7280 }
7281 
7282 
7283 static void
7284 query_free_fd(struct file_descriptor* descriptor)
7285 {
7286 	struct fs_mount* mount = descriptor->u.mount;
7287 
7288 	if (mount != NULL) {
7289 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7290 		put_mount(mount);
7291 	}
7292 }
7293 
7294 
7295 static status_t
7296 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7297 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7298 {
7299 	struct fs_mount* mount = descriptor->u.mount;
7300 
7301 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7302 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7303 			bufferSize, _count);
7304 	}
7305 
7306 	return B_UNSUPPORTED;
7307 }
7308 
7309 
7310 static status_t
7311 query_rewind(struct file_descriptor* descriptor)
7312 {
7313 	struct fs_mount* mount = descriptor->u.mount;
7314 
7315 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7316 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7317 
7318 	return B_UNSUPPORTED;
7319 }
7320 
7321 
7322 //	#pragma mark - General File System functions
7323 
7324 
7325 static dev_t
7326 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7327 	const char* args, bool kernel)
7328 {
7329 	struct ::fs_mount* mount;
7330 	status_t status = B_OK;
7331 	fs_volume* volume = NULL;
7332 	int32 layer = 0;
7333 	Vnode* coveredNode = NULL;
7334 
7335 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7336 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7337 
7338 	// The path is always safe, we just have to make sure that fsName is
7339 	// almost valid - we can't make any assumptions about args, though.
7340 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7341 	// We'll get it from the DDM later.
7342 	if (fsName == NULL) {
7343 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7344 			return B_BAD_VALUE;
7345 	} else if (fsName[0] == '\0')
7346 		return B_BAD_VALUE;
7347 
7348 	RecursiveLocker mountOpLocker(sMountOpLock);
7349 
7350 	// Helper to delete a newly created file device on failure.
7351 	// Not exactly beautiful, but helps to keep the code below cleaner.
7352 	struct FileDeviceDeleter {
7353 		FileDeviceDeleter() : id(-1) {}
7354 		~FileDeviceDeleter()
7355 		{
7356 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7357 		}
7358 
7359 		partition_id id;
7360 	} fileDeviceDeleter;
7361 
7362 	// If the file system is not a "virtual" one, the device argument should
7363 	// point to a real file/device (if given at all).
7364 	// get the partition
7365 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7366 	KPartition* partition = NULL;
7367 	KPath normalizedDevice;
7368 	bool newlyCreatedFileDevice = false;
7369 
7370 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7371 		// normalize the device path
7372 		status = normalizedDevice.SetTo(device, true);
7373 		if (status != B_OK)
7374 			return status;
7375 
7376 		// get a corresponding partition from the DDM
7377 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7378 		if (partition == NULL) {
7379 			// Partition not found: This either means, the user supplied
7380 			// an invalid path, or the path refers to an image file. We try
7381 			// to let the DDM create a file device for the path.
7382 			partition_id deviceID = ddm->CreateFileDevice(
7383 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7384 			if (deviceID >= 0) {
7385 				partition = ddm->RegisterPartition(deviceID);
7386 				if (newlyCreatedFileDevice)
7387 					fileDeviceDeleter.id = deviceID;
7388 			}
7389 		}
7390 
7391 		if (!partition) {
7392 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7393 				normalizedDevice.Path()));
7394 			return B_ENTRY_NOT_FOUND;
7395 		}
7396 
7397 		device = normalizedDevice.Path();
7398 			// correct path to file device
7399 	}
7400 	PartitionRegistrar partitionRegistrar(partition, true);
7401 
7402 	// Write lock the partition's device. For the time being, we keep the lock
7403 	// until we're done mounting -- not nice, but ensure, that no-one is
7404 	// interfering.
7405 	// TODO: Just mark the partition busy while mounting!
7406 	KDiskDevice* diskDevice = NULL;
7407 	if (partition) {
7408 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7409 		if (!diskDevice) {
7410 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7411 			return B_ERROR;
7412 		}
7413 	}
7414 
7415 	DeviceWriteLocker writeLocker(diskDevice, true);
7416 		// this takes over the write lock acquired before
7417 
7418 	if (partition != NULL) {
7419 		// make sure, that the partition is not busy
7420 		if (partition->IsBusy()) {
7421 			TRACE(("fs_mount(): Partition is busy.\n"));
7422 			return B_BUSY;
7423 		}
7424 
7425 		// if no FS name had been supplied, we get it from the partition
7426 		if (fsName == NULL) {
7427 			KDiskSystem* diskSystem = partition->DiskSystem();
7428 			if (!diskSystem) {
7429 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7430 					"recognize it.\n"));
7431 				return B_BAD_VALUE;
7432 			}
7433 
7434 			if (!diskSystem->IsFileSystem()) {
7435 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7436 					"partitioning system.\n"));
7437 				return B_BAD_VALUE;
7438 			}
7439 
7440 			// The disk system name will not change, and the KDiskSystem
7441 			// object will not go away while the disk device is locked (and
7442 			// the partition has a reference to it), so this is safe.
7443 			fsName = diskSystem->Name();
7444 		}
7445 	}
7446 
7447 	mount = new(std::nothrow) (struct ::fs_mount);
7448 	if (mount == NULL)
7449 		return B_NO_MEMORY;
7450 
7451 	mount->device_name = strdup(device);
7452 		// "device" can be NULL
7453 
7454 	status = mount->entry_cache.Init();
7455 	if (status != B_OK)
7456 		goto err1;
7457 
7458 	// initialize structure
7459 	mount->id = sNextMountID++;
7460 	mount->partition = NULL;
7461 	mount->root_vnode = NULL;
7462 	mount->covers_vnode = NULL;
7463 	mount->unmounting = false;
7464 	mount->owns_file_device = false;
7465 	mount->volume = NULL;
7466 
7467 	// build up the volume(s)
7468 	while (true) {
7469 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7470 		if (layerFSName == NULL) {
7471 			if (layer == 0) {
7472 				status = B_NO_MEMORY;
7473 				goto err1;
7474 			}
7475 
7476 			break;
7477 		}
7478 		MemoryDeleter layerFSNameDeleter(layerFSName);
7479 
7480 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7481 		if (volume == NULL) {
7482 			status = B_NO_MEMORY;
7483 			goto err1;
7484 		}
7485 
7486 		volume->id = mount->id;
7487 		volume->partition = partition != NULL ? partition->ID() : -1;
7488 		volume->layer = layer++;
7489 		volume->private_volume = NULL;
7490 		volume->ops = NULL;
7491 		volume->sub_volume = NULL;
7492 		volume->super_volume = NULL;
7493 		volume->file_system = NULL;
7494 		volume->file_system_name = NULL;
7495 
7496 		volume->file_system_name = get_file_system_name(layerFSName);
7497 		if (volume->file_system_name == NULL) {
7498 			status = B_NO_MEMORY;
7499 			free(volume);
7500 			goto err1;
7501 		}
7502 
7503 		volume->file_system = get_file_system(layerFSName);
7504 		if (volume->file_system == NULL) {
7505 			status = B_DEVICE_NOT_FOUND;
7506 			free(volume->file_system_name);
7507 			free(volume);
7508 			goto err1;
7509 		}
7510 
7511 		if (mount->volume == NULL)
7512 			mount->volume = volume;
7513 		else {
7514 			volume->super_volume = mount->volume;
7515 			mount->volume->sub_volume = volume;
7516 			mount->volume = volume;
7517 		}
7518 	}
7519 
7520 	// insert mount struct into list before we call FS's mount() function
7521 	// so that vnodes can be created for this mount
7522 	rw_lock_write_lock(&sMountLock);
7523 	sMountsTable->Insert(mount);
7524 	rw_lock_write_unlock(&sMountLock);
7525 
7526 	ino_t rootID;
7527 
7528 	if (!sRoot) {
7529 		// we haven't mounted anything yet
7530 		if (strcmp(path, "/") != 0) {
7531 			status = B_ERROR;
7532 			goto err2;
7533 		}
7534 
7535 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7536 			args, &rootID);
7537 		if (status != B_OK || mount->volume->ops == NULL)
7538 			goto err2;
7539 	} else {
7540 		{
7541 			VnodePutter temp;
7542 			status = path_to_vnode(path, true, temp, NULL, kernel);
7543 			coveredNode = temp.Detach();
7544 		}
7545 		if (status != B_OK)
7546 			goto err2;
7547 
7548 		mount->covers_vnode = coveredNode;
7549 
7550 		// make sure covered_vnode is a directory
7551 		if (!S_ISDIR(coveredNode->Type())) {
7552 			status = B_NOT_A_DIRECTORY;
7553 			goto err3;
7554 		}
7555 
7556 		if (coveredNode->IsCovered()) {
7557 			// this is already a covered vnode
7558 			status = B_BUSY;
7559 			goto err3;
7560 		}
7561 
7562 		// mount it/them
7563 		fs_volume* volume = mount->volume;
7564 		while (volume) {
7565 			status = volume->file_system->mount(volume, device, flags, args,
7566 				&rootID);
7567 			if (status != B_OK || volume->ops == NULL) {
7568 				if (status == B_OK && volume->ops == NULL)
7569 					panic("fs_mount: mount() succeeded but ops is NULL!");
7570 				if (volume->sub_volume)
7571 					goto err4;
7572 				goto err3;
7573 			}
7574 
7575 			volume = volume->super_volume;
7576 		}
7577 
7578 		volume = mount->volume;
7579 		while (volume) {
7580 			if (volume->ops->all_layers_mounted != NULL)
7581 				volume->ops->all_layers_mounted(volume);
7582 			volume = volume->super_volume;
7583 		}
7584 	}
7585 
7586 	// the root node is supposed to be owned by the file system - it must
7587 	// exist at this point
7588 	rw_lock_write_lock(&sVnodeLock);
7589 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7590 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7591 		panic("fs_mount: file system does not own its root node!\n");
7592 		status = B_ERROR;
7593 		rw_lock_write_unlock(&sVnodeLock);
7594 		goto err4;
7595 	}
7596 
7597 	// set up the links between the root vnode and the vnode it covers
7598 	if (coveredNode != NULL) {
7599 		if (coveredNode->IsCovered()) {
7600 			// the vnode is covered now
7601 			status = B_BUSY;
7602 			rw_lock_write_unlock(&sVnodeLock);
7603 			goto err4;
7604 		}
7605 
7606 		mount->root_vnode->covers = coveredNode;
7607 		mount->root_vnode->SetCovering(true);
7608 
7609 		coveredNode->covered_by = mount->root_vnode;
7610 		coveredNode->SetCovered(true);
7611 	}
7612 	rw_lock_write_unlock(&sVnodeLock);
7613 
7614 	if (!sRoot) {
7615 		sRoot = mount->root_vnode;
7616 		mutex_lock(&sIOContextRootLock);
7617 		get_current_io_context(true)->root = sRoot;
7618 		mutex_unlock(&sIOContextRootLock);
7619 		inc_vnode_ref_count(sRoot);
7620 	}
7621 
7622 	// supply the partition (if any) with the mount cookie and mark it mounted
7623 	if (partition) {
7624 		partition->SetMountCookie(mount->volume->private_volume);
7625 		partition->SetVolumeID(mount->id);
7626 
7627 		// keep a partition reference as long as the partition is mounted
7628 		partitionRegistrar.Detach();
7629 		mount->partition = partition;
7630 		mount->owns_file_device = newlyCreatedFileDevice;
7631 		fileDeviceDeleter.id = -1;
7632 	}
7633 
7634 	notify_mount(mount->id,
7635 		coveredNode != NULL ? coveredNode->device : -1,
7636 		coveredNode ? coveredNode->id : -1);
7637 
7638 	return mount->id;
7639 
7640 err4:
7641 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7642 err3:
7643 	if (coveredNode != NULL)
7644 		put_vnode(coveredNode);
7645 err2:
7646 	rw_lock_write_lock(&sMountLock);
7647 	sMountsTable->Remove(mount);
7648 	rw_lock_write_unlock(&sMountLock);
7649 err1:
7650 	delete mount;
7651 
7652 	return status;
7653 }
7654 
7655 
7656 static status_t
7657 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7658 {
7659 	struct fs_mount* mount;
7660 	status_t err;
7661 
7662 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7663 		mountID, kernel));
7664 
7665 	VnodePutter pathVnode;
7666 	if (path != NULL) {
7667 		err = path_to_vnode(path, true, pathVnode, NULL, kernel);
7668 		if (err != B_OK)
7669 			return B_ENTRY_NOT_FOUND;
7670 	}
7671 
7672 	RecursiveLocker mountOpLocker(sMountOpLock);
7673 	ReadLocker mountLocker(sMountLock);
7674 
7675 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7676 	if (mount == NULL) {
7677 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7678 			pathVnode.Get());
7679 	}
7680 
7681 	mountLocker.Unlock();
7682 
7683 	if (path != NULL) {
7684 		if (mount->root_vnode != pathVnode.Get()) {
7685 			// not mountpoint
7686 			return B_BAD_VALUE;
7687 		}
7688 
7689 		pathVnode.Unset();
7690 	}
7691 
7692 	// if the volume is associated with a partition, lock the device of the
7693 	// partition as long as we are unmounting
7694 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7695 	KPartition* partition = mount->partition;
7696 	KDiskDevice* diskDevice = NULL;
7697 	if (partition != NULL) {
7698 		if (partition->Device() == NULL) {
7699 			dprintf("fs_unmount(): There is no device!\n");
7700 			return B_ERROR;
7701 		}
7702 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7703 		if (!diskDevice) {
7704 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7705 			return B_ERROR;
7706 		}
7707 	}
7708 	DeviceWriteLocker writeLocker(diskDevice, true);
7709 
7710 	// make sure, that the partition is not busy
7711 	if (partition != NULL) {
7712 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7713 			dprintf("fs_unmount(): Partition is busy.\n");
7714 			return B_BUSY;
7715 		}
7716 	}
7717 
7718 	// grab the vnode master mutex to keep someone from creating
7719 	// a vnode while we're figuring out if we can continue
7720 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7721 
7722 	bool disconnectedDescriptors = false;
7723 
7724 	while (true) {
7725 		bool busy = false;
7726 
7727 		// cycle through the list of vnodes associated with this mount and
7728 		// make sure all of them are not busy or have refs on them
7729 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7730 		while (struct vnode* vnode = iterator.Next()) {
7731 			if (vnode->IsBusy()) {
7732 				dprintf("fs_unmount(): inode %" B_PRIdINO " is busy\n", vnode->id);
7733 				busy = true;
7734 				break;
7735 			}
7736 
7737 			// check the vnode's ref count -- subtract additional references for
7738 			// covering
7739 			int32 refCount = vnode->ref_count;
7740 			if (vnode->covers != NULL)
7741 				refCount--;
7742 			if (vnode->covered_by != NULL)
7743 				refCount--;
7744 
7745 			if (refCount != 0) {
7746 				dprintf("fs_unmount(): inode %" B_PRIdINO " is still referenced\n", vnode->id);
7747 				// there are still vnodes in use on this mount, so we cannot
7748 				// unmount yet
7749 				busy = true;
7750 				break;
7751 			}
7752 		}
7753 
7754 		if (!busy)
7755 			break;
7756 
7757 		if ((flags & B_FORCE_UNMOUNT) == 0)
7758 			return B_BUSY;
7759 
7760 		if (disconnectedDescriptors) {
7761 			// wait a bit until the last access is finished, and then try again
7762 			vnodesWriteLocker.Unlock();
7763 			snooze(100000);
7764 			// TODO: if there is some kind of bug that prevents the ref counts
7765 			// from getting back to zero, this will fall into an endless loop...
7766 			vnodesWriteLocker.Lock();
7767 			continue;
7768 		}
7769 
7770 		// the file system is still busy - but we're forced to unmount it,
7771 		// so let's disconnect all open file descriptors
7772 
7773 		mount->unmounting = true;
7774 			// prevent new vnodes from being created
7775 
7776 		vnodesWriteLocker.Unlock();
7777 
7778 		disconnect_mount_or_vnode_fds(mount, NULL);
7779 		disconnectedDescriptors = true;
7780 
7781 		vnodesWriteLocker.Lock();
7782 	}
7783 
7784 	// We can safely continue. Mark all of the vnodes busy and this mount
7785 	// structure in unmounting state. Also undo the vnode covers/covered_by
7786 	// links.
7787 	mount->unmounting = true;
7788 
7789 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7790 	while (struct vnode* vnode = iterator.Next()) {
7791 		// Remove all covers/covered_by links from other mounts' nodes to this
7792 		// vnode and adjust the node ref count accordingly. We will release the
7793 		// references to the external vnodes below.
7794 		if (Vnode* coveredNode = vnode->covers) {
7795 			if (Vnode* coveringNode = vnode->covered_by) {
7796 				// We have both covered and covering vnodes, so just remove us
7797 				// from the chain.
7798 				coveredNode->covered_by = coveringNode;
7799 				coveringNode->covers = coveredNode;
7800 				vnode->ref_count -= 2;
7801 
7802 				vnode->covered_by = NULL;
7803 				vnode->covers = NULL;
7804 				vnode->SetCovering(false);
7805 				vnode->SetCovered(false);
7806 			} else {
7807 				// We only have a covered vnode. Remove its link to us.
7808 				coveredNode->covered_by = NULL;
7809 				coveredNode->SetCovered(false);
7810 				vnode->ref_count--;
7811 
7812 				// If the other node is an external vnode, we keep its link
7813 				// link around so we can put the reference later on. Otherwise
7814 				// we get rid of it right now.
7815 				if (coveredNode->mount == mount) {
7816 					vnode->covers = NULL;
7817 					coveredNode->ref_count--;
7818 				}
7819 			}
7820 		} else if (Vnode* coveringNode = vnode->covered_by) {
7821 			// We only have a covering vnode. Remove its link to us.
7822 			coveringNode->covers = NULL;
7823 			coveringNode->SetCovering(false);
7824 			vnode->ref_count--;
7825 
7826 			// If the other node is an external vnode, we keep its link
7827 			// link around so we can put the reference later on. Otherwise
7828 			// we get rid of it right now.
7829 			if (coveringNode->mount == mount) {
7830 				vnode->covered_by = NULL;
7831 				coveringNode->ref_count--;
7832 			}
7833 		}
7834 
7835 		vnode->SetBusy(true);
7836 		vnode_to_be_freed(vnode);
7837 	}
7838 
7839 	vnodesWriteLocker.Unlock();
7840 
7841 	// Free all vnodes associated with this mount.
7842 	// They will be removed from the mount list by free_vnode(), so
7843 	// we don't have to do this.
7844 	while (struct vnode* vnode = mount->vnodes.Head()) {
7845 		// Put the references to external covered/covering vnodes we kept above.
7846 		if (Vnode* coveredNode = vnode->covers)
7847 			put_vnode(coveredNode);
7848 		if (Vnode* coveringNode = vnode->covered_by)
7849 			put_vnode(coveringNode);
7850 
7851 		free_vnode(vnode, false);
7852 	}
7853 
7854 	// remove the mount structure from the hash table
7855 	rw_lock_write_lock(&sMountLock);
7856 	sMountsTable->Remove(mount);
7857 	rw_lock_write_unlock(&sMountLock);
7858 
7859 	mountOpLocker.Unlock();
7860 
7861 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7862 	notify_unmount(mount->id);
7863 
7864 	// dereference the partition and mark it unmounted
7865 	if (partition) {
7866 		partition->SetVolumeID(-1);
7867 		partition->SetMountCookie(NULL);
7868 
7869 		if (mount->owns_file_device)
7870 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7871 		partition->Unregister();
7872 	}
7873 
7874 	delete mount;
7875 	return B_OK;
7876 }
7877 
7878 
7879 static status_t
7880 fs_sync(dev_t device)
7881 {
7882 	struct fs_mount* mount;
7883 	status_t status = get_mount(device, &mount);
7884 	if (status != B_OK)
7885 		return status;
7886 
7887 	struct vnode marker;
7888 	memset(&marker, 0, sizeof(marker));
7889 	marker.SetBusy(true);
7890 	marker.SetRemoved(true);
7891 
7892 	// First, synchronize all file caches
7893 
7894 	while (true) {
7895 		WriteLocker locker(sVnodeLock);
7896 			// Note: That's the easy way. Which is probably OK for sync(),
7897 			// since it's a relatively rare call and doesn't need to allow for
7898 			// a lot of concurrency. Using a read lock would be possible, but
7899 			// also more involved, since we had to lock the individual nodes
7900 			// and take care of the locking order, which we might not want to
7901 			// do while holding fs_mount::lock.
7902 
7903 		// synchronize access to vnode list
7904 		mutex_lock(&mount->lock);
7905 
7906 		struct vnode* vnode;
7907 		if (!marker.IsRemoved()) {
7908 			vnode = mount->vnodes.GetNext(&marker);
7909 			mount->vnodes.Remove(&marker);
7910 			marker.SetRemoved(true);
7911 		} else
7912 			vnode = mount->vnodes.First();
7913 
7914 		while (vnode != NULL && (vnode->cache == NULL
7915 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7916 			// TODO: we could track writes (and writable mapped vnodes)
7917 			//	and have a simple flag that we could test for here
7918 			vnode = mount->vnodes.GetNext(vnode);
7919 		}
7920 
7921 		if (vnode != NULL) {
7922 			// insert marker vnode again
7923 			mount->vnodes.InsertBefore(mount->vnodes.GetNext(vnode), &marker);
7924 			marker.SetRemoved(false);
7925 		}
7926 
7927 		mutex_unlock(&mount->lock);
7928 
7929 		if (vnode == NULL)
7930 			break;
7931 
7932 		vnode = lookup_vnode(mount->id, vnode->id);
7933 		if (vnode == NULL || vnode->IsBusy())
7934 			continue;
7935 
7936 		if (vnode->ref_count == 0) {
7937 			// this vnode has been unused before
7938 			vnode_used(vnode);
7939 		}
7940 		inc_vnode_ref_count(vnode);
7941 
7942 		locker.Unlock();
7943 
7944 		if (vnode->cache != NULL && !vnode->IsRemoved())
7945 			vnode->cache->WriteModified();
7946 
7947 		put_vnode(vnode);
7948 	}
7949 
7950 	// Let the file systems do their synchronizing work
7951 	if (HAS_FS_MOUNT_CALL(mount, sync))
7952 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7953 
7954 	// Finally, flush the underlying device's write cache (if possible.)
7955 	if (mount->partition != NULL && mount->partition->Device() != NULL)
7956 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
7957 
7958 	put_mount(mount);
7959 	return status;
7960 }
7961 
7962 
7963 static status_t
7964 fs_read_info(dev_t device, struct fs_info* info)
7965 {
7966 	struct fs_mount* mount;
7967 	status_t status = get_mount(device, &mount);
7968 	if (status != B_OK)
7969 		return status;
7970 
7971 	memset(info, 0, sizeof(struct fs_info));
7972 
7973 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7974 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7975 
7976 	// fill in info the file system doesn't (have to) know about
7977 	if (status == B_OK) {
7978 		info->dev = mount->id;
7979 		info->root = mount->root_vnode->id;
7980 
7981 		fs_volume* volume = mount->volume;
7982 		while (volume->super_volume != NULL)
7983 			volume = volume->super_volume;
7984 
7985 		strlcpy(info->fsh_name, volume->file_system_name,
7986 			sizeof(info->fsh_name));
7987 		if (mount->device_name != NULL) {
7988 			strlcpy(info->device_name, mount->device_name,
7989 				sizeof(info->device_name));
7990 		}
7991 	}
7992 
7993 	// if the call is not supported by the file system, there are still
7994 	// the parts that we filled out ourselves
7995 
7996 	put_mount(mount);
7997 	return status;
7998 }
7999 
8000 
8001 static status_t
8002 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8003 {
8004 	struct fs_mount* mount;
8005 	status_t status = get_mount(device, &mount);
8006 	if (status != B_OK)
8007 		return status;
8008 
8009 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8010 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8011 	else
8012 		status = B_READ_ONLY_DEVICE;
8013 
8014 	put_mount(mount);
8015 	return status;
8016 }
8017 
8018 
8019 static dev_t
8020 fs_next_device(int32* _cookie)
8021 {
8022 	struct fs_mount* mount = NULL;
8023 	dev_t device = *_cookie;
8024 
8025 	rw_lock_read_lock(&sMountLock);
8026 
8027 	// Since device IDs are assigned sequentially, this algorithm
8028 	// does work good enough. It makes sure that the device list
8029 	// returned is sorted, and that no device is skipped when an
8030 	// already visited device got unmounted.
8031 
8032 	while (device < sNextMountID) {
8033 		mount = find_mount(device++);
8034 		if (mount != NULL && mount->volume->private_volume != NULL)
8035 			break;
8036 	}
8037 
8038 	*_cookie = device;
8039 
8040 	if (mount != NULL)
8041 		device = mount->id;
8042 	else
8043 		device = B_BAD_VALUE;
8044 
8045 	rw_lock_read_unlock(&sMountLock);
8046 
8047 	return device;
8048 }
8049 
8050 
8051 ssize_t
8052 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8053 	void *buffer, size_t readBytes)
8054 {
8055 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8056 	if (attrFD < 0)
8057 		return attrFD;
8058 
8059 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8060 
8061 	_kern_close(attrFD);
8062 
8063 	return bytesRead;
8064 }
8065 
8066 
8067 static status_t
8068 get_cwd(char* buffer, size_t size, bool kernel)
8069 {
8070 	// Get current working directory from io context
8071 	struct io_context* context = get_current_io_context(kernel);
8072 	status_t status;
8073 
8074 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8075 
8076 	mutex_lock(&context->io_mutex);
8077 
8078 	struct vnode* vnode = context->cwd;
8079 	if (vnode)
8080 		inc_vnode_ref_count(vnode);
8081 
8082 	mutex_unlock(&context->io_mutex);
8083 
8084 	if (vnode) {
8085 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8086 		put_vnode(vnode);
8087 	} else
8088 		status = B_ERROR;
8089 
8090 	return status;
8091 }
8092 
8093 
8094 static status_t
8095 set_cwd(int fd, char* path, bool kernel)
8096 {
8097 	struct io_context* context;
8098 	struct vnode* oldDirectory;
8099 
8100 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8101 
8102 	// Get vnode for passed path, and bail if it failed
8103 	VnodePutter vnode;
8104 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
8105 	if (status < 0)
8106 		return status;
8107 
8108 	if (!S_ISDIR(vnode->Type())) {
8109 		// nope, can't cwd to here
8110 		return B_NOT_A_DIRECTORY;
8111 	}
8112 
8113 	// We need to have the permission to enter the directory, too
8114 	if (HAS_FS_CALL(vnode, access)) {
8115 		status = FS_CALL(vnode.Get(), access, X_OK);
8116 		if (status != B_OK)
8117 			return status;
8118 	}
8119 
8120 	// Get current io context and lock
8121 	context = get_current_io_context(kernel);
8122 	mutex_lock(&context->io_mutex);
8123 
8124 	// save the old current working directory first
8125 	oldDirectory = context->cwd;
8126 	context->cwd = vnode.Detach();
8127 
8128 	mutex_unlock(&context->io_mutex);
8129 
8130 	if (oldDirectory)
8131 		put_vnode(oldDirectory);
8132 
8133 	return B_NO_ERROR;
8134 }
8135 
8136 
8137 static status_t
8138 user_copy_name(char* to, const char* from, size_t length)
8139 {
8140 	ssize_t len = user_strlcpy(to, from, length);
8141 	if (len < 0)
8142 		return len;
8143 	if (len >= (ssize_t)length)
8144 		return B_NAME_TOO_LONG;
8145 	return B_OK;
8146 }
8147 
8148 
8149 //	#pragma mark - kernel mirrored syscalls
8150 
8151 
8152 dev_t
8153 _kern_mount(const char* path, const char* device, const char* fsName,
8154 	uint32 flags, const char* args, size_t argsLength)
8155 {
8156 	KPath pathBuffer(path);
8157 	if (pathBuffer.InitCheck() != B_OK)
8158 		return B_NO_MEMORY;
8159 
8160 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8161 }
8162 
8163 
8164 status_t
8165 _kern_unmount(const char* path, uint32 flags)
8166 {
8167 	KPath pathBuffer(path);
8168 	if (pathBuffer.InitCheck() != B_OK)
8169 		return B_NO_MEMORY;
8170 
8171 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8172 }
8173 
8174 
8175 status_t
8176 _kern_read_fs_info(dev_t device, struct fs_info* info)
8177 {
8178 	if (info == NULL)
8179 		return B_BAD_VALUE;
8180 
8181 	return fs_read_info(device, info);
8182 }
8183 
8184 
8185 status_t
8186 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8187 {
8188 	if (info == NULL)
8189 		return B_BAD_VALUE;
8190 
8191 	return fs_write_info(device, info, mask);
8192 }
8193 
8194 
8195 status_t
8196 _kern_sync(void)
8197 {
8198 	// Note: _kern_sync() is also called from _user_sync()
8199 	int32 cookie = 0;
8200 	dev_t device;
8201 	while ((device = next_dev(&cookie)) >= 0) {
8202 		status_t status = fs_sync(device);
8203 		if (status != B_OK && status != B_BAD_VALUE) {
8204 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8205 				strerror(status));
8206 		}
8207 	}
8208 
8209 	return B_OK;
8210 }
8211 
8212 
8213 dev_t
8214 _kern_next_device(int32* _cookie)
8215 {
8216 	return fs_next_device(_cookie);
8217 }
8218 
8219 
8220 status_t
8221 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8222 	size_t infoSize)
8223 {
8224 	if (infoSize != sizeof(fd_info))
8225 		return B_BAD_VALUE;
8226 
8227 	// get the team
8228 	Team* team = Team::Get(teamID);
8229 	if (team == NULL)
8230 		return B_BAD_TEAM_ID;
8231 	BReference<Team> teamReference(team, true);
8232 
8233 	// now that we have a team reference, its I/O context won't go away
8234 	io_context* context = team->io_context;
8235 	MutexLocker contextLocker(context->io_mutex);
8236 
8237 	uint32 slot = *_cookie;
8238 
8239 	struct file_descriptor* descriptor;
8240 	while (slot < context->table_size
8241 		&& (descriptor = context->fds[slot]) == NULL) {
8242 		slot++;
8243 	}
8244 
8245 	if (slot >= context->table_size)
8246 		return B_ENTRY_NOT_FOUND;
8247 
8248 	info->number = slot;
8249 	info->open_mode = descriptor->open_mode;
8250 
8251 	struct vnode* vnode = fd_vnode(descriptor);
8252 	if (vnode != NULL) {
8253 		info->device = vnode->device;
8254 		info->node = vnode->id;
8255 	} else if (descriptor->u.mount != NULL) {
8256 		info->device = descriptor->u.mount->id;
8257 		info->node = -1;
8258 	}
8259 
8260 	*_cookie = slot + 1;
8261 	return B_OK;
8262 }
8263 
8264 
8265 int
8266 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8267 	int perms)
8268 {
8269 	if ((openMode & O_CREAT) != 0) {
8270 		return file_create_entry_ref(device, inode, name, openMode, perms,
8271 			true);
8272 	}
8273 
8274 	return file_open_entry_ref(device, inode, name, openMode, true);
8275 }
8276 
8277 
8278 /*!	\brief Opens a node specified by a FD + path pair.
8279 
8280 	At least one of \a fd and \a path must be specified.
8281 	If only \a fd is given, the function opens the node identified by this
8282 	FD. If only a path is given, this path is opened. If both are given and
8283 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8284 	of the directory (!) identified by \a fd.
8285 
8286 	\param fd The FD. May be < 0.
8287 	\param path The absolute or relative path. May be \c NULL.
8288 	\param openMode The open mode.
8289 	\return A FD referring to the newly opened node, or an error code,
8290 			if an error occurs.
8291 */
8292 int
8293 _kern_open(int fd, const char* path, int openMode, int perms)
8294 {
8295 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8296 	if (pathBuffer.InitCheck() != B_OK)
8297 		return B_NO_MEMORY;
8298 
8299 	if ((openMode & O_CREAT) != 0)
8300 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8301 
8302 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8303 }
8304 
8305 
8306 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8307 
8308 	The supplied name may be \c NULL, in which case directory identified
8309 	by \a device and \a inode will be opened. Otherwise \a device and
8310 	\a inode identify the parent directory of the directory to be opened
8311 	and \a name its entry name.
8312 
8313 	\param device If \a name is specified the ID of the device the parent
8314 		   directory of the directory to be opened resides on, otherwise
8315 		   the device of the directory itself.
8316 	\param inode If \a name is specified the node ID of the parent
8317 		   directory of the directory to be opened, otherwise node ID of the
8318 		   directory itself.
8319 	\param name The entry name of the directory to be opened. If \c NULL,
8320 		   the \a device + \a inode pair identify the node to be opened.
8321 	\return The FD of the newly opened directory or an error code, if
8322 			something went wrong.
8323 */
8324 int
8325 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8326 {
8327 	return dir_open_entry_ref(device, inode, name, true);
8328 }
8329 
8330 
8331 /*!	\brief Opens a directory specified by a FD + path pair.
8332 
8333 	At least one of \a fd and \a path must be specified.
8334 	If only \a fd is given, the function opens the directory identified by this
8335 	FD. If only a path is given, this path is opened. If both are given and
8336 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8337 	of the directory (!) identified by \a fd.
8338 
8339 	\param fd The FD. May be < 0.
8340 	\param path The absolute or relative path. May be \c NULL.
8341 	\return A FD referring to the newly opened directory, or an error code,
8342 			if an error occurs.
8343 */
8344 int
8345 _kern_open_dir(int fd, const char* path)
8346 {
8347 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8348 	if (pathBuffer.InitCheck() != B_OK)
8349 		return B_NO_MEMORY;
8350 
8351 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8352 }
8353 
8354 
8355 status_t
8356 _kern_fcntl(int fd, int op, size_t argument)
8357 {
8358 	return common_fcntl(fd, op, argument, true);
8359 }
8360 
8361 
8362 status_t
8363 _kern_fsync(int fd)
8364 {
8365 	return common_sync(fd, true);
8366 }
8367 
8368 
8369 status_t
8370 _kern_lock_node(int fd)
8371 {
8372 	return common_lock_node(fd, true);
8373 }
8374 
8375 
8376 status_t
8377 _kern_unlock_node(int fd)
8378 {
8379 	return common_unlock_node(fd, true);
8380 }
8381 
8382 
8383 status_t
8384 _kern_preallocate(int fd, off_t offset, off_t length)
8385 {
8386 	return common_preallocate(fd, offset, length, true);
8387 }
8388 
8389 
8390 status_t
8391 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8392 	int perms)
8393 {
8394 	return dir_create_entry_ref(device, inode, name, perms, true);
8395 }
8396 
8397 
8398 /*!	\brief Creates a directory specified by a FD + path pair.
8399 
8400 	\a path must always be specified (it contains the name of the new directory
8401 	at least). If only a path is given, this path identifies the location at
8402 	which the directory shall be created. If both \a fd and \a path are given
8403 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8404 	of the directory (!) identified by \a fd.
8405 
8406 	\param fd The FD. May be < 0.
8407 	\param path The absolute or relative path. Must not be \c NULL.
8408 	\param perms The access permissions the new directory shall have.
8409 	\return \c B_OK, if the directory has been created successfully, another
8410 			error code otherwise.
8411 */
8412 status_t
8413 _kern_create_dir(int fd, const char* path, int perms)
8414 {
8415 	KPath pathBuffer(path, KPath::DEFAULT);
8416 	if (pathBuffer.InitCheck() != B_OK)
8417 		return B_NO_MEMORY;
8418 
8419 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8420 }
8421 
8422 
8423 status_t
8424 _kern_remove_dir(int fd, const char* path)
8425 {
8426 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8427 	if (pathBuffer.InitCheck() != B_OK)
8428 		return B_NO_MEMORY;
8429 
8430 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8431 }
8432 
8433 
8434 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8435 
8436 	At least one of \a fd and \a path must be specified.
8437 	If only \a fd is given, the function the symlink to be read is the node
8438 	identified by this FD. If only a path is given, this path identifies the
8439 	symlink to be read. If both are given and the path is absolute, \a fd is
8440 	ignored; a relative path is reckoned off of the directory (!) identified
8441 	by \a fd.
8442 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8443 	will still be updated to reflect the required buffer size.
8444 
8445 	\param fd The FD. May be < 0.
8446 	\param path The absolute or relative path. May be \c NULL.
8447 	\param buffer The buffer into which the contents of the symlink shall be
8448 		   written.
8449 	\param _bufferSize A pointer to the size of the supplied buffer.
8450 	\return The length of the link on success or an appropriate error code
8451 */
8452 status_t
8453 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8454 {
8455 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8456 	if (pathBuffer.InitCheck() != B_OK)
8457 		return B_NO_MEMORY;
8458 
8459 	return common_read_link(fd, pathBuffer.LockBuffer(),
8460 		buffer, _bufferSize, true);
8461 }
8462 
8463 
8464 /*!	\brief Creates a symlink specified by a FD + path pair.
8465 
8466 	\a path must always be specified (it contains the name of the new symlink
8467 	at least). If only a path is given, this path identifies the location at
8468 	which the symlink shall be created. If both \a fd and \a path are given and
8469 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8470 	of the directory (!) identified by \a fd.
8471 
8472 	\param fd The FD. May be < 0.
8473 	\param toPath The absolute or relative path. Must not be \c NULL.
8474 	\param mode The access permissions the new symlink shall have.
8475 	\return \c B_OK, if the symlink has been created successfully, another
8476 			error code otherwise.
8477 */
8478 status_t
8479 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8480 {
8481 	KPath pathBuffer(path);
8482 	if (pathBuffer.InitCheck() != B_OK)
8483 		return B_NO_MEMORY;
8484 
8485 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8486 		toPath, mode, true);
8487 }
8488 
8489 
8490 status_t
8491 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8492 	bool traverseLeafLink)
8493 {
8494 	KPath pathBuffer(path);
8495 	KPath toPathBuffer(toPath);
8496 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8497 		return B_NO_MEMORY;
8498 
8499 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8500 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8501 }
8502 
8503 
8504 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8505 
8506 	\a path must always be specified (it contains at least the name of the entry
8507 	to be deleted). If only a path is given, this path identifies the entry
8508 	directly. If both \a fd and \a path are given and the path is absolute,
8509 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8510 	identified by \a fd.
8511 
8512 	\param fd The FD. May be < 0.
8513 	\param path The absolute or relative path. Must not be \c NULL.
8514 	\return \c B_OK, if the entry has been removed successfully, another
8515 			error code otherwise.
8516 */
8517 status_t
8518 _kern_unlink(int fd, const char* path)
8519 {
8520 	KPath pathBuffer(path);
8521 	if (pathBuffer.InitCheck() != B_OK)
8522 		return B_NO_MEMORY;
8523 
8524 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8525 }
8526 
8527 
8528 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8529 		   by another FD + path pair.
8530 
8531 	\a oldPath and \a newPath must always be specified (they contain at least
8532 	the name of the entry). If only a path is given, this path identifies the
8533 	entry directly. If both a FD and a path are given and the path is absolute,
8534 	the FD is ignored; a relative path is reckoned off of the directory (!)
8535 	identified by the respective FD.
8536 
8537 	\param oldFD The FD of the old location. May be < 0.
8538 	\param oldPath The absolute or relative path of the old location. Must not
8539 		   be \c NULL.
8540 	\param newFD The FD of the new location. May be < 0.
8541 	\param newPath The absolute or relative path of the new location. Must not
8542 		   be \c NULL.
8543 	\return \c B_OK, if the entry has been moved successfully, another
8544 			error code otherwise.
8545 */
8546 status_t
8547 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8548 {
8549 	KPath oldPathBuffer(oldPath);
8550 	KPath newPathBuffer(newPath);
8551 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8552 		return B_NO_MEMORY;
8553 
8554 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8555 		newFD, newPathBuffer.LockBuffer(), true);
8556 }
8557 
8558 
8559 status_t
8560 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8561 {
8562 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8563 	if (pathBuffer.InitCheck() != B_OK)
8564 		return B_NO_MEMORY;
8565 
8566 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8567 		true);
8568 }
8569 
8570 
8571 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8572 
8573 	If only \a fd is given, the stat operation associated with the type
8574 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8575 	given, this path identifies the entry for whose node to retrieve the
8576 	stat data. If both \a fd and \a path are given and the path is absolute,
8577 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8578 	identified by \a fd and specifies the entry whose stat data shall be
8579 	retrieved.
8580 
8581 	\param fd The FD. May be < 0.
8582 	\param path The absolute or relative path. Must not be \c NULL.
8583 	\param traverseLeafLink If \a path is given, \c true specifies that the
8584 		   function shall not stick to symlinks, but traverse them.
8585 	\param stat The buffer the stat data shall be written into.
8586 	\param statSize The size of the supplied stat buffer.
8587 	\return \c B_OK, if the the stat data have been read successfully, another
8588 			error code otherwise.
8589 */
8590 status_t
8591 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8592 	struct stat* stat, size_t statSize)
8593 {
8594 	struct stat completeStat;
8595 	struct stat* originalStat = NULL;
8596 	status_t status;
8597 
8598 	if (statSize > sizeof(struct stat))
8599 		return B_BAD_VALUE;
8600 
8601 	// this supports different stat extensions
8602 	if (statSize < sizeof(struct stat)) {
8603 		originalStat = stat;
8604 		stat = &completeStat;
8605 	}
8606 
8607 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8608 
8609 	if (status == B_OK && originalStat != NULL)
8610 		memcpy(originalStat, stat, statSize);
8611 
8612 	return status;
8613 }
8614 
8615 
8616 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8617 
8618 	If only \a fd is given, the stat operation associated with the type
8619 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8620 	given, this path identifies the entry for whose node to write the
8621 	stat data. If both \a fd and \a path are given and the path is absolute,
8622 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8623 	identified by \a fd and specifies the entry whose stat data shall be
8624 	written.
8625 
8626 	\param fd The FD. May be < 0.
8627 	\param path The absolute or relative path. May be \c NULL.
8628 	\param traverseLeafLink If \a path is given, \c true specifies that the
8629 		   function shall not stick to symlinks, but traverse them.
8630 	\param stat The buffer containing the stat data to be written.
8631 	\param statSize The size of the supplied stat buffer.
8632 	\param statMask A mask specifying which parts of the stat data shall be
8633 		   written.
8634 	\return \c B_OK, if the the stat data have been written successfully,
8635 			another error code otherwise.
8636 */
8637 status_t
8638 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8639 	const struct stat* stat, size_t statSize, int statMask)
8640 {
8641 	struct stat completeStat;
8642 
8643 	if (statSize > sizeof(struct stat))
8644 		return B_BAD_VALUE;
8645 
8646 	// this supports different stat extensions
8647 	if (statSize < sizeof(struct stat)) {
8648 		memset((uint8*)&completeStat + statSize, 0,
8649 			sizeof(struct stat) - statSize);
8650 		memcpy(&completeStat, stat, statSize);
8651 		stat = &completeStat;
8652 	}
8653 
8654 	status_t status;
8655 
8656 	if (path != NULL) {
8657 		// path given: write the stat of the node referred to by (fd, path)
8658 		KPath pathBuffer(path);
8659 		if (pathBuffer.InitCheck() != B_OK)
8660 			return B_NO_MEMORY;
8661 
8662 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8663 			traverseLeafLink, stat, statMask, true);
8664 	} else {
8665 		// no path given: get the FD and use the FD operation
8666 		FileDescriptorPutter descriptor
8667 			(get_fd(get_current_io_context(true), fd));
8668 		if (!descriptor.IsSet())
8669 			return B_FILE_ERROR;
8670 
8671 		if (descriptor->ops->fd_write_stat)
8672 			status = descriptor->ops->fd_write_stat(descriptor.Get(), stat, statMask);
8673 		else
8674 			status = B_UNSUPPORTED;
8675 	}
8676 
8677 	return status;
8678 }
8679 
8680 
8681 int
8682 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8683 {
8684 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8685 	if (pathBuffer.InitCheck() != B_OK)
8686 		return B_NO_MEMORY;
8687 
8688 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8689 }
8690 
8691 
8692 int
8693 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8694 	int openMode)
8695 {
8696 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8697 	if (pathBuffer.InitCheck() != B_OK)
8698 		return B_NO_MEMORY;
8699 
8700 	if ((openMode & O_CREAT) != 0) {
8701 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8702 			true);
8703 	}
8704 
8705 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8706 }
8707 
8708 
8709 status_t
8710 _kern_remove_attr(int fd, const char* name)
8711 {
8712 	return attr_remove(fd, name, true);
8713 }
8714 
8715 
8716 status_t
8717 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8718 	const char* toName)
8719 {
8720 	return attr_rename(fromFile, fromName, toFile, toName, true);
8721 }
8722 
8723 
8724 int
8725 _kern_open_index_dir(dev_t device)
8726 {
8727 	return index_dir_open(device, true);
8728 }
8729 
8730 
8731 status_t
8732 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8733 {
8734 	return index_create(device, name, type, flags, true);
8735 }
8736 
8737 
8738 status_t
8739 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8740 {
8741 	return index_name_read_stat(device, name, stat, true);
8742 }
8743 
8744 
8745 status_t
8746 _kern_remove_index(dev_t device, const char* name)
8747 {
8748 	return index_remove(device, name, true);
8749 }
8750 
8751 
8752 status_t
8753 _kern_getcwd(char* buffer, size_t size)
8754 {
8755 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8756 
8757 	// Call vfs to get current working directory
8758 	return get_cwd(buffer, size, true);
8759 }
8760 
8761 
8762 status_t
8763 _kern_setcwd(int fd, const char* path)
8764 {
8765 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8766 	if (pathBuffer.InitCheck() != B_OK)
8767 		return B_NO_MEMORY;
8768 
8769 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8770 }
8771 
8772 
8773 //	#pragma mark - userland syscalls
8774 
8775 
8776 dev_t
8777 _user_mount(const char* userPath, const char* userDevice,
8778 	const char* userFileSystem, uint32 flags, const char* userArgs,
8779 	size_t argsLength)
8780 {
8781 	char fileSystem[B_FILE_NAME_LENGTH];
8782 	KPath path, device;
8783 	char* args = NULL;
8784 	status_t status;
8785 
8786 	if (!IS_USER_ADDRESS(userPath))
8787 		return B_BAD_ADDRESS;
8788 
8789 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8790 		return B_NO_MEMORY;
8791 
8792 	status = user_copy_name(path.LockBuffer(), userPath,
8793 		B_PATH_NAME_LENGTH);
8794 	if (status != B_OK)
8795 		return status;
8796 	path.UnlockBuffer();
8797 
8798 	if (userFileSystem != NULL) {
8799 		if (!IS_USER_ADDRESS(userFileSystem))
8800 			return B_BAD_ADDRESS;
8801 
8802 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8803 		if (status != B_OK)
8804 			return status;
8805 	}
8806 
8807 	if (userDevice != NULL) {
8808 		if (!IS_USER_ADDRESS(userDevice))
8809 			return B_BAD_ADDRESS;
8810 
8811 		status = user_copy_name(device.LockBuffer(), userDevice,
8812 			B_PATH_NAME_LENGTH);
8813 		if (status != B_OK)
8814 			return status;
8815 		device.UnlockBuffer();
8816 	}
8817 
8818 	if (userArgs != NULL && argsLength > 0) {
8819 		if (!IS_USER_ADDRESS(userArgs))
8820 			return B_BAD_ADDRESS;
8821 
8822 		// this is a safety restriction
8823 		if (argsLength >= 65536)
8824 			return B_NAME_TOO_LONG;
8825 
8826 		args = (char*)malloc(argsLength + 1);
8827 		if (args == NULL)
8828 			return B_NO_MEMORY;
8829 
8830 		status = user_copy_name(args, userArgs, argsLength + 1);
8831 		if (status != B_OK) {
8832 			free(args);
8833 			return status;
8834 		}
8835 	}
8836 
8837 	status = fs_mount(path.LockBuffer(),
8838 		userDevice != NULL ? device.Path() : NULL,
8839 		userFileSystem ? fileSystem : NULL, flags, args, false);
8840 
8841 	free(args);
8842 	return status;
8843 }
8844 
8845 
8846 status_t
8847 _user_unmount(const char* userPath, uint32 flags)
8848 {
8849 	if (!IS_USER_ADDRESS(userPath))
8850 		return B_BAD_ADDRESS;
8851 
8852 	KPath pathBuffer;
8853 	if (pathBuffer.InitCheck() != B_OK)
8854 		return B_NO_MEMORY;
8855 
8856 	char* path = pathBuffer.LockBuffer();
8857 
8858 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8859 	if (status != B_OK)
8860 		return status;
8861 
8862 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8863 }
8864 
8865 
8866 status_t
8867 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8868 {
8869 	struct fs_info info;
8870 	status_t status;
8871 
8872 	if (userInfo == NULL)
8873 		return B_BAD_VALUE;
8874 
8875 	if (!IS_USER_ADDRESS(userInfo))
8876 		return B_BAD_ADDRESS;
8877 
8878 	status = fs_read_info(device, &info);
8879 	if (status != B_OK)
8880 		return status;
8881 
8882 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8883 		return B_BAD_ADDRESS;
8884 
8885 	return B_OK;
8886 }
8887 
8888 
8889 status_t
8890 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8891 {
8892 	struct fs_info info;
8893 
8894 	if (userInfo == NULL)
8895 		return B_BAD_VALUE;
8896 
8897 	if (!IS_USER_ADDRESS(userInfo)
8898 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8899 		return B_BAD_ADDRESS;
8900 
8901 	return fs_write_info(device, &info, mask);
8902 }
8903 
8904 
8905 dev_t
8906 _user_next_device(int32* _userCookie)
8907 {
8908 	int32 cookie;
8909 	dev_t device;
8910 
8911 	if (!IS_USER_ADDRESS(_userCookie)
8912 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8913 		return B_BAD_ADDRESS;
8914 
8915 	device = fs_next_device(&cookie);
8916 
8917 	if (device >= B_OK) {
8918 		// update user cookie
8919 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8920 			return B_BAD_ADDRESS;
8921 	}
8922 
8923 	return device;
8924 }
8925 
8926 
8927 status_t
8928 _user_sync(void)
8929 {
8930 	return _kern_sync();
8931 }
8932 
8933 
8934 status_t
8935 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8936 	size_t infoSize)
8937 {
8938 	struct fd_info info;
8939 	uint32 cookie;
8940 
8941 	// only root can do this
8942 	if (geteuid() != 0)
8943 		return B_NOT_ALLOWED;
8944 
8945 	if (infoSize != sizeof(fd_info))
8946 		return B_BAD_VALUE;
8947 
8948 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8949 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8950 		return B_BAD_ADDRESS;
8951 
8952 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8953 	if (status != B_OK)
8954 		return status;
8955 
8956 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8957 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8958 		return B_BAD_ADDRESS;
8959 
8960 	return status;
8961 }
8962 
8963 
8964 status_t
8965 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8966 	char* userPath, size_t pathLength)
8967 {
8968 	if (!IS_USER_ADDRESS(userPath))
8969 		return B_BAD_ADDRESS;
8970 
8971 	KPath path;
8972 	if (path.InitCheck() != B_OK)
8973 		return B_NO_MEMORY;
8974 
8975 	// copy the leaf name onto the stack
8976 	char stackLeaf[B_FILE_NAME_LENGTH];
8977 	if (leaf != NULL) {
8978 		if (!IS_USER_ADDRESS(leaf))
8979 			return B_BAD_ADDRESS;
8980 
8981 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8982 		if (status != B_OK)
8983 			return status;
8984 
8985 		leaf = stackLeaf;
8986 	}
8987 
8988 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8989 		false, path.LockBuffer(), path.BufferSize());
8990 	if (status != B_OK)
8991 		return status;
8992 
8993 	path.UnlockBuffer();
8994 
8995 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8996 	if (length < 0)
8997 		return length;
8998 	if (length >= (int)pathLength)
8999 		return B_BUFFER_OVERFLOW;
9000 
9001 	return B_OK;
9002 }
9003 
9004 
9005 status_t
9006 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9007 {
9008 	if (userPath == NULL || buffer == NULL)
9009 		return B_BAD_VALUE;
9010 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9011 		return B_BAD_ADDRESS;
9012 
9013 	// copy path from userland
9014 	KPath pathBuffer;
9015 	if (pathBuffer.InitCheck() != B_OK)
9016 		return B_NO_MEMORY;
9017 	char* path = pathBuffer.LockBuffer();
9018 
9019 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9020 	if (status != B_OK)
9021 		return status;
9022 
9023 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9024 		false);
9025 	if (error != B_OK)
9026 		return error;
9027 
9028 	// copy back to userland
9029 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9030 	if (len < 0)
9031 		return len;
9032 	if (len >= B_PATH_NAME_LENGTH)
9033 		return B_BUFFER_OVERFLOW;
9034 
9035 	return B_OK;
9036 }
9037 
9038 
9039 int
9040 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9041 	int openMode, int perms)
9042 {
9043 	char name[B_FILE_NAME_LENGTH];
9044 
9045 	if (userName == NULL || device < 0 || inode < 0)
9046 		return B_BAD_VALUE;
9047 	if (!IS_USER_ADDRESS(userName))
9048 		return B_BAD_ADDRESS;
9049 	status_t status = user_copy_name(name, userName, sizeof(name));
9050 	if (status != B_OK)
9051 		return status;
9052 
9053 	if ((openMode & O_CREAT) != 0) {
9054 		return file_create_entry_ref(device, inode, name, openMode, perms,
9055 			false);
9056 	}
9057 
9058 	return file_open_entry_ref(device, inode, name, openMode, false);
9059 }
9060 
9061 
9062 int
9063 _user_open(int fd, const char* userPath, int openMode, int perms)
9064 {
9065 	KPath path;
9066 	if (path.InitCheck() != B_OK)
9067 		return B_NO_MEMORY;
9068 
9069 	char* buffer = path.LockBuffer();
9070 
9071 	if (!IS_USER_ADDRESS(userPath))
9072 		return B_BAD_ADDRESS;
9073 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9074 	if (status != B_OK)
9075 		return status;
9076 
9077 	if ((openMode & O_CREAT) != 0)
9078 		return file_create(fd, buffer, openMode, perms, false);
9079 
9080 	return file_open(fd, buffer, openMode, false);
9081 }
9082 
9083 
9084 int
9085 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9086 {
9087 	if (userName != NULL) {
9088 		char name[B_FILE_NAME_LENGTH];
9089 
9090 		if (!IS_USER_ADDRESS(userName))
9091 			return B_BAD_ADDRESS;
9092 		status_t status = user_copy_name(name, userName, sizeof(name));
9093 		if (status != B_OK)
9094 			return status;
9095 
9096 		return dir_open_entry_ref(device, inode, name, false);
9097 	}
9098 	return dir_open_entry_ref(device, inode, NULL, false);
9099 }
9100 
9101 
9102 int
9103 _user_open_dir(int fd, const char* userPath)
9104 {
9105 	if (userPath == NULL)
9106 		return dir_open(fd, NULL, false);
9107 
9108 	KPath path;
9109 	if (path.InitCheck() != B_OK)
9110 		return B_NO_MEMORY;
9111 
9112 	char* buffer = path.LockBuffer();
9113 
9114 	if (!IS_USER_ADDRESS(userPath))
9115 		return B_BAD_ADDRESS;
9116 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9117 	if (status != B_OK)
9118 		return status;
9119 
9120 	return dir_open(fd, buffer, false);
9121 }
9122 
9123 
9124 /*!	\brief Opens a directory's parent directory and returns the entry name
9125 		   of the former.
9126 
9127 	Aside from that it returns the directory's entry name, this method is
9128 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9129 	equivalent, if \a userName is \c NULL.
9130 
9131 	If a name buffer is supplied and the name does not fit the buffer, the
9132 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9133 
9134 	\param fd A FD referring to a directory.
9135 	\param userName Buffer the directory's entry name shall be written into.
9136 		   May be \c NULL.
9137 	\param nameLength Size of the name buffer.
9138 	\return The file descriptor of the opened parent directory, if everything
9139 			went fine, an error code otherwise.
9140 */
9141 int
9142 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9143 {
9144 	bool kernel = false;
9145 
9146 	if (userName && !IS_USER_ADDRESS(userName))
9147 		return B_BAD_ADDRESS;
9148 
9149 	// open the parent dir
9150 	int parentFD = dir_open(fd, (char*)"..", kernel);
9151 	if (parentFD < 0)
9152 		return parentFD;
9153 	FDCloser fdCloser(parentFD, kernel);
9154 
9155 	if (userName) {
9156 		// get the vnodes
9157 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9158 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9159 		VnodePutter parentVNodePutter(parentVNode);
9160 		VnodePutter dirVNodePutter(dirVNode);
9161 		if (!parentVNode || !dirVNode)
9162 			return B_FILE_ERROR;
9163 
9164 		// get the vnode name
9165 		char _buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
9166 		struct dirent* buffer = (struct dirent*)_buffer;
9167 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9168 			sizeof(_buffer), get_current_io_context(false));
9169 		if (status != B_OK)
9170 			return status;
9171 
9172 		// copy the name to the userland buffer
9173 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9174 		if (len < 0)
9175 			return len;
9176 		if (len >= (int)nameLength)
9177 			return B_BUFFER_OVERFLOW;
9178 	}
9179 
9180 	return fdCloser.Detach();
9181 }
9182 
9183 
9184 status_t
9185 _user_fcntl(int fd, int op, size_t argument)
9186 {
9187 	status_t status = common_fcntl(fd, op, argument, false);
9188 	if (op == F_SETLKW)
9189 		syscall_restart_handle_post(status);
9190 
9191 	return status;
9192 }
9193 
9194 
9195 status_t
9196 _user_fsync(int fd)
9197 {
9198 	return common_sync(fd, false);
9199 }
9200 
9201 
9202 status_t
9203 _user_flock(int fd, int operation)
9204 {
9205 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9206 
9207 	// Check if the operation is valid
9208 	switch (operation & ~LOCK_NB) {
9209 		case LOCK_UN:
9210 		case LOCK_SH:
9211 		case LOCK_EX:
9212 			break;
9213 
9214 		default:
9215 			return B_BAD_VALUE;
9216 	}
9217 
9218 	struct vnode* vnode;
9219 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, false));
9220 	if (!descriptor.IsSet())
9221 		return B_FILE_ERROR;
9222 
9223 	if (descriptor->type != FDTYPE_FILE)
9224 		return B_BAD_VALUE;
9225 
9226 	struct flock flock;
9227 	flock.l_start = 0;
9228 	flock.l_len = OFF_MAX;
9229 	flock.l_whence = 0;
9230 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9231 
9232 	status_t status;
9233 	if ((operation & LOCK_UN) != 0) {
9234 		if (HAS_FS_CALL(vnode, release_lock))
9235 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9236 		else
9237 			status = release_advisory_lock(vnode, NULL, descriptor.Get(), &flock);
9238 	} else {
9239 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9240 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9241 				(operation & LOCK_NB) == 0);
9242 		} else {
9243 			status = acquire_advisory_lock(vnode, NULL, descriptor.Get(), &flock,
9244 				(operation & LOCK_NB) == 0);
9245 		}
9246 	}
9247 
9248 	syscall_restart_handle_post(status);
9249 
9250 	return status;
9251 }
9252 
9253 
9254 status_t
9255 _user_lock_node(int fd)
9256 {
9257 	return common_lock_node(fd, false);
9258 }
9259 
9260 
9261 status_t
9262 _user_unlock_node(int fd)
9263 {
9264 	return common_unlock_node(fd, false);
9265 }
9266 
9267 
9268 status_t
9269 _user_preallocate(int fd, off_t offset, off_t length)
9270 {
9271 	return common_preallocate(fd, offset, length, false);
9272 }
9273 
9274 
9275 status_t
9276 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9277 	int perms)
9278 {
9279 	char name[B_FILE_NAME_LENGTH];
9280 	status_t status;
9281 
9282 	if (!IS_USER_ADDRESS(userName))
9283 		return B_BAD_ADDRESS;
9284 
9285 	status = user_copy_name(name, userName, sizeof(name));
9286 	if (status != B_OK)
9287 		return status;
9288 
9289 	return dir_create_entry_ref(device, inode, name, perms, false);
9290 }
9291 
9292 
9293 status_t
9294 _user_create_dir(int fd, const char* userPath, int perms)
9295 {
9296 	KPath pathBuffer;
9297 	if (pathBuffer.InitCheck() != B_OK)
9298 		return B_NO_MEMORY;
9299 
9300 	char* path = pathBuffer.LockBuffer();
9301 
9302 	if (!IS_USER_ADDRESS(userPath))
9303 		return B_BAD_ADDRESS;
9304 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9305 	if (status != B_OK)
9306 		return status;
9307 
9308 	return dir_create(fd, path, perms, false);
9309 }
9310 
9311 
9312 status_t
9313 _user_remove_dir(int fd, const char* userPath)
9314 {
9315 	KPath pathBuffer;
9316 	if (pathBuffer.InitCheck() != B_OK)
9317 		return B_NO_MEMORY;
9318 
9319 	char* path = pathBuffer.LockBuffer();
9320 
9321 	if (userPath != NULL) {
9322 		if (!IS_USER_ADDRESS(userPath))
9323 			return B_BAD_ADDRESS;
9324 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9325 		if (status != B_OK)
9326 			return status;
9327 	}
9328 
9329 	return dir_remove(fd, userPath ? path : NULL, false);
9330 }
9331 
9332 
9333 status_t
9334 _user_read_link(int fd, const char* userPath, char* userBuffer,
9335 	size_t* userBufferSize)
9336 {
9337 	KPath pathBuffer, linkBuffer;
9338 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9339 		return B_NO_MEMORY;
9340 
9341 	size_t bufferSize;
9342 
9343 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9344 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9345 		return B_BAD_ADDRESS;
9346 
9347 	char* path = pathBuffer.LockBuffer();
9348 	char* buffer = linkBuffer.LockBuffer();
9349 
9350 	if (userPath) {
9351 		if (!IS_USER_ADDRESS(userPath))
9352 			return B_BAD_ADDRESS;
9353 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9354 		if (status != B_OK)
9355 			return status;
9356 
9357 		if (bufferSize > B_PATH_NAME_LENGTH)
9358 			bufferSize = B_PATH_NAME_LENGTH;
9359 	}
9360 
9361 	size_t newBufferSize = bufferSize;
9362 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9363 		&newBufferSize, false);
9364 
9365 	// we also update the bufferSize in case of errors
9366 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9367 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9368 		return B_BAD_ADDRESS;
9369 
9370 	if (status != B_OK)
9371 		return status;
9372 
9373 	bufferSize = min_c(newBufferSize, bufferSize);
9374 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9375 		return B_BAD_ADDRESS;
9376 
9377 	return B_OK;
9378 }
9379 
9380 
9381 status_t
9382 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9383 	int mode)
9384 {
9385 	KPath pathBuffer;
9386 	KPath toPathBuffer;
9387 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9388 		return B_NO_MEMORY;
9389 
9390 	char* path = pathBuffer.LockBuffer();
9391 	char* toPath = toPathBuffer.LockBuffer();
9392 
9393 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9394 		return B_BAD_ADDRESS;
9395 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9396 	if (status != B_OK)
9397 		return status;
9398 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9399 	if (status != B_OK)
9400 		return status;
9401 
9402 	return common_create_symlink(fd, path, toPath, mode, false);
9403 }
9404 
9405 
9406 status_t
9407 _user_create_link(int pathFD, const char* userPath, int toFD,
9408 	const char* userToPath, bool traverseLeafLink)
9409 {
9410 	KPath pathBuffer;
9411 	KPath toPathBuffer;
9412 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9413 		return B_NO_MEMORY;
9414 
9415 	char* path = pathBuffer.LockBuffer();
9416 	char* toPath = toPathBuffer.LockBuffer();
9417 
9418 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9419 		return B_BAD_ADDRESS;
9420 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9421 	if (status != B_OK)
9422 		return status;
9423 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9424 	if (status != B_OK)
9425 		return status;
9426 
9427 	status = check_path(toPath);
9428 	if (status != B_OK)
9429 		return status;
9430 
9431 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9432 		false);
9433 }
9434 
9435 
9436 status_t
9437 _user_unlink(int fd, const char* userPath)
9438 {
9439 	KPath pathBuffer;
9440 	if (pathBuffer.InitCheck() != B_OK)
9441 		return B_NO_MEMORY;
9442 
9443 	char* path = pathBuffer.LockBuffer();
9444 
9445 	if (!IS_USER_ADDRESS(userPath))
9446 		return B_BAD_ADDRESS;
9447 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9448 	if (status != B_OK)
9449 		return status;
9450 
9451 	return common_unlink(fd, path, false);
9452 }
9453 
9454 
9455 status_t
9456 _user_rename(int oldFD, const char* userOldPath, int newFD,
9457 	const char* userNewPath)
9458 {
9459 	KPath oldPathBuffer;
9460 	KPath newPathBuffer;
9461 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9462 		return B_NO_MEMORY;
9463 
9464 	char* oldPath = oldPathBuffer.LockBuffer();
9465 	char* newPath = newPathBuffer.LockBuffer();
9466 
9467 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9468 		return B_BAD_ADDRESS;
9469 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9470 	if (status != B_OK)
9471 		return status;
9472 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9473 	if (status != B_OK)
9474 		return status;
9475 
9476 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9477 }
9478 
9479 
9480 status_t
9481 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9482 {
9483 	KPath pathBuffer;
9484 	if (pathBuffer.InitCheck() != B_OK)
9485 		return B_NO_MEMORY;
9486 
9487 	char* path = pathBuffer.LockBuffer();
9488 
9489 	if (!IS_USER_ADDRESS(userPath))
9490 		return B_BAD_ADDRESS;
9491 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9492 	if (status != B_OK)
9493 		return status;
9494 
9495 	// split into directory vnode and filename path
9496 	char filename[B_FILE_NAME_LENGTH];
9497 	VnodePutter dir;
9498 	status = fd_and_path_to_dir_vnode(fd, path, dir, filename, false);
9499 	if (status != B_OK)
9500 		return status;
9501 
9502 	// the underlying FS needs to support creating FIFOs
9503 	if (!HAS_FS_CALL(dir, create_special_node))
9504 		return B_UNSUPPORTED;
9505 
9506 	// create the entry	-- the FIFO sub node is set up automatically
9507 	fs_vnode superVnode;
9508 	ino_t nodeID;
9509 	status = FS_CALL(dir.Get(), create_special_node, filename, NULL,
9510 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9511 
9512 	// create_special_node() acquired a reference for us that we don't need.
9513 	if (status == B_OK)
9514 		put_vnode(dir->mount->volume, nodeID);
9515 
9516 	return status;
9517 }
9518 
9519 
9520 status_t
9521 _user_create_pipe(int* userFDs)
9522 {
9523 	// rootfs should support creating FIFOs, but let's be sure
9524 	if (!HAS_FS_CALL(sRoot, create_special_node))
9525 		return B_UNSUPPORTED;
9526 
9527 	// create the node	-- the FIFO sub node is set up automatically
9528 	fs_vnode superVnode;
9529 	ino_t nodeID;
9530 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9531 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9532 	if (status != B_OK)
9533 		return status;
9534 
9535 	// We've got one reference to the node and need another one.
9536 	struct vnode* vnode;
9537 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9538 	if (status != B_OK) {
9539 		// that should not happen
9540 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9541 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9542 		return status;
9543 	}
9544 
9545 	// Everything looks good so far. Open two FDs for reading respectively
9546 	// writing.
9547 	int fds[2];
9548 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9549 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9550 
9551 	FDCloser closer0(fds[0], false);
9552 	FDCloser closer1(fds[1], false);
9553 
9554 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9555 
9556 	// copy FDs to userland
9557 	if (status == B_OK) {
9558 		if (!IS_USER_ADDRESS(userFDs)
9559 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9560 			status = B_BAD_ADDRESS;
9561 		}
9562 	}
9563 
9564 	// keep FDs, if everything went fine
9565 	if (status == B_OK) {
9566 		closer0.Detach();
9567 		closer1.Detach();
9568 	}
9569 
9570 	return status;
9571 }
9572 
9573 
9574 status_t
9575 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9576 {
9577 	KPath pathBuffer;
9578 	if (pathBuffer.InitCheck() != B_OK)
9579 		return B_NO_MEMORY;
9580 
9581 	char* path = pathBuffer.LockBuffer();
9582 
9583 	if (!IS_USER_ADDRESS(userPath))
9584 		return B_BAD_ADDRESS;
9585 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9586 	if (status != B_OK)
9587 		return status;
9588 
9589 	return common_access(fd, path, mode, effectiveUserGroup, false);
9590 }
9591 
9592 
9593 status_t
9594 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9595 	struct stat* userStat, size_t statSize)
9596 {
9597 	struct stat stat = {0};
9598 	status_t status;
9599 
9600 	if (statSize > sizeof(struct stat))
9601 		return B_BAD_VALUE;
9602 
9603 	if (!IS_USER_ADDRESS(userStat))
9604 		return B_BAD_ADDRESS;
9605 
9606 	if (userPath != NULL) {
9607 		// path given: get the stat of the node referred to by (fd, path)
9608 		if (!IS_USER_ADDRESS(userPath))
9609 			return B_BAD_ADDRESS;
9610 
9611 		KPath pathBuffer;
9612 		if (pathBuffer.InitCheck() != B_OK)
9613 			return B_NO_MEMORY;
9614 
9615 		char* path = pathBuffer.LockBuffer();
9616 
9617 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9618 		if (status != B_OK)
9619 			return status;
9620 
9621 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9622 	} else {
9623 		// no path given: get the FD and use the FD operation
9624 		FileDescriptorPutter descriptor
9625 			(get_fd(get_current_io_context(false), fd));
9626 		if (!descriptor.IsSet())
9627 			return B_FILE_ERROR;
9628 
9629 		if (descriptor->ops->fd_read_stat)
9630 			status = descriptor->ops->fd_read_stat(descriptor.Get(), &stat);
9631 		else
9632 			status = B_UNSUPPORTED;
9633 	}
9634 
9635 	if (status != B_OK)
9636 		return status;
9637 
9638 	return user_memcpy(userStat, &stat, statSize);
9639 }
9640 
9641 
9642 status_t
9643 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9644 	const struct stat* userStat, size_t statSize, int statMask)
9645 {
9646 	if (statSize > sizeof(struct stat))
9647 		return B_BAD_VALUE;
9648 
9649 	struct stat stat;
9650 
9651 	if (!IS_USER_ADDRESS(userStat)
9652 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9653 		return B_BAD_ADDRESS;
9654 
9655 	// clear additional stat fields
9656 	if (statSize < sizeof(struct stat))
9657 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9658 
9659 	status_t status;
9660 
9661 	if (userPath != NULL) {
9662 		// path given: write the stat of the node referred to by (fd, path)
9663 		if (!IS_USER_ADDRESS(userPath))
9664 			return B_BAD_ADDRESS;
9665 
9666 		KPath pathBuffer;
9667 		if (pathBuffer.InitCheck() != B_OK)
9668 			return B_NO_MEMORY;
9669 
9670 		char* path = pathBuffer.LockBuffer();
9671 
9672 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9673 		if (status != B_OK)
9674 			return status;
9675 
9676 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9677 			statMask, false);
9678 	} else {
9679 		// no path given: get the FD and use the FD operation
9680 		FileDescriptorPutter descriptor
9681 			(get_fd(get_current_io_context(false), fd));
9682 		if (!descriptor.IsSet())
9683 			return B_FILE_ERROR;
9684 
9685 		if (descriptor->ops->fd_write_stat) {
9686 			status = descriptor->ops->fd_write_stat(descriptor.Get(), &stat,
9687 				statMask);
9688 		} else
9689 			status = B_UNSUPPORTED;
9690 	}
9691 
9692 	return status;
9693 }
9694 
9695 
9696 int
9697 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9698 {
9699 	KPath pathBuffer;
9700 	if (pathBuffer.InitCheck() != B_OK)
9701 		return B_NO_MEMORY;
9702 
9703 	char* path = pathBuffer.LockBuffer();
9704 
9705 	if (userPath != NULL) {
9706 		if (!IS_USER_ADDRESS(userPath))
9707 			return B_BAD_ADDRESS;
9708 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9709 		if (status != B_OK)
9710 			return status;
9711 	}
9712 
9713 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9714 }
9715 
9716 
9717 ssize_t
9718 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9719 	size_t readBytes)
9720 {
9721 	char attribute[B_FILE_NAME_LENGTH];
9722 
9723 	if (userAttribute == NULL)
9724 		return B_BAD_VALUE;
9725 	if (!IS_USER_ADDRESS(userAttribute))
9726 		return B_BAD_ADDRESS;
9727 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9728 	if (status != B_OK)
9729 		return status;
9730 
9731 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9732 	if (attr < 0)
9733 		return attr;
9734 
9735 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9736 	_user_close(attr);
9737 
9738 	return bytes;
9739 }
9740 
9741 
9742 ssize_t
9743 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9744 	const void* buffer, size_t writeBytes)
9745 {
9746 	char attribute[B_FILE_NAME_LENGTH];
9747 
9748 	if (userAttribute == NULL)
9749 		return B_BAD_VALUE;
9750 	if (!IS_USER_ADDRESS(userAttribute))
9751 		return B_BAD_ADDRESS;
9752 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9753 	if (status != B_OK)
9754 		return status;
9755 
9756 	// Try to support the BeOS typical truncation as well as the position
9757 	// argument
9758 	int attr = attr_create(fd, NULL, attribute, type,
9759 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9760 	if (attr < 0)
9761 		return attr;
9762 
9763 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9764 	_user_close(attr);
9765 
9766 	return bytes;
9767 }
9768 
9769 
9770 status_t
9771 _user_stat_attr(int fd, const char* userAttribute,
9772 	struct attr_info* userAttrInfo)
9773 {
9774 	char attribute[B_FILE_NAME_LENGTH];
9775 
9776 	if (userAttribute == NULL || userAttrInfo == NULL)
9777 		return B_BAD_VALUE;
9778 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9779 		return B_BAD_ADDRESS;
9780 	status_t status = user_copy_name(attribute, userAttribute,
9781 		sizeof(attribute));
9782 	if (status != B_OK)
9783 		return status;
9784 
9785 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9786 	if (attr < 0)
9787 		return attr;
9788 
9789 	struct file_descriptor* descriptor
9790 		= get_fd(get_current_io_context(false), attr);
9791 	if (descriptor == NULL) {
9792 		_user_close(attr);
9793 		return B_FILE_ERROR;
9794 	}
9795 
9796 	struct stat stat;
9797 	if (descriptor->ops->fd_read_stat)
9798 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9799 	else
9800 		status = B_UNSUPPORTED;
9801 
9802 	put_fd(descriptor);
9803 	_user_close(attr);
9804 
9805 	if (status == B_OK) {
9806 		attr_info info;
9807 		info.type = stat.st_type;
9808 		info.size = stat.st_size;
9809 
9810 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9811 			return B_BAD_ADDRESS;
9812 	}
9813 
9814 	return status;
9815 }
9816 
9817 
9818 int
9819 _user_open_attr(int fd, const char* userPath, const char* userName,
9820 	uint32 type, int openMode)
9821 {
9822 	char name[B_FILE_NAME_LENGTH];
9823 
9824 	if (!IS_USER_ADDRESS(userName))
9825 		return B_BAD_ADDRESS;
9826 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9827 	if (status != B_OK)
9828 		return status;
9829 
9830 	KPath pathBuffer;
9831 	if (pathBuffer.InitCheck() != B_OK)
9832 		return B_NO_MEMORY;
9833 
9834 	char* path = pathBuffer.LockBuffer();
9835 
9836 	if (userPath != NULL) {
9837 		if (!IS_USER_ADDRESS(userPath))
9838 			return B_BAD_ADDRESS;
9839 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9840 		if (status != B_OK)
9841 			return status;
9842 	}
9843 
9844 	if ((openMode & O_CREAT) != 0) {
9845 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9846 			false);
9847 	}
9848 
9849 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9850 }
9851 
9852 
9853 status_t
9854 _user_remove_attr(int fd, const char* userName)
9855 {
9856 	char name[B_FILE_NAME_LENGTH];
9857 
9858 	if (!IS_USER_ADDRESS(userName))
9859 		return B_BAD_ADDRESS;
9860 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9861 	if (status != B_OK)
9862 		return status;
9863 
9864 	return attr_remove(fd, name, false);
9865 }
9866 
9867 
9868 status_t
9869 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9870 	const char* userToName)
9871 {
9872 	if (!IS_USER_ADDRESS(userFromName)
9873 		|| !IS_USER_ADDRESS(userToName))
9874 		return B_BAD_ADDRESS;
9875 
9876 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9877 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9878 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9879 		return B_NO_MEMORY;
9880 
9881 	char* fromName = fromNameBuffer.LockBuffer();
9882 	char* toName = toNameBuffer.LockBuffer();
9883 
9884 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
9885 	if (status != B_OK)
9886 		return status;
9887 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
9888 	if (status != B_OK)
9889 		return status;
9890 
9891 	return attr_rename(fromFile, fromName, toFile, toName, false);
9892 }
9893 
9894 
9895 int
9896 _user_open_index_dir(dev_t device)
9897 {
9898 	return index_dir_open(device, false);
9899 }
9900 
9901 
9902 status_t
9903 _user_create_index(dev_t device, const char* userName, uint32 type,
9904 	uint32 flags)
9905 {
9906 	char name[B_FILE_NAME_LENGTH];
9907 
9908 	if (!IS_USER_ADDRESS(userName))
9909 		return B_BAD_ADDRESS;
9910 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9911 	if (status != B_OK)
9912 		return status;
9913 
9914 	return index_create(device, name, type, flags, false);
9915 }
9916 
9917 
9918 status_t
9919 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9920 {
9921 	char name[B_FILE_NAME_LENGTH];
9922 	struct stat stat = {0};
9923 	status_t status;
9924 
9925 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
9926 		return B_BAD_ADDRESS;
9927 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9928 	if (status != B_OK)
9929 		return status;
9930 
9931 	status = index_name_read_stat(device, name, &stat, false);
9932 	if (status == B_OK) {
9933 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9934 			return B_BAD_ADDRESS;
9935 	}
9936 
9937 	return status;
9938 }
9939 
9940 
9941 status_t
9942 _user_remove_index(dev_t device, const char* userName)
9943 {
9944 	char name[B_FILE_NAME_LENGTH];
9945 
9946 	if (!IS_USER_ADDRESS(userName))
9947 		return B_BAD_ADDRESS;
9948 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9949 	if (status != B_OK)
9950 		return status;
9951 
9952 	return index_remove(device, name, false);
9953 }
9954 
9955 
9956 status_t
9957 _user_getcwd(char* userBuffer, size_t size)
9958 {
9959 	if (size == 0)
9960 		return B_BAD_VALUE;
9961 	if (!IS_USER_ADDRESS(userBuffer))
9962 		return B_BAD_ADDRESS;
9963 
9964 	if (size > kMaxPathLength)
9965 		size = kMaxPathLength;
9966 
9967 	KPath pathBuffer(size);
9968 	if (pathBuffer.InitCheck() != B_OK)
9969 		return B_NO_MEMORY;
9970 
9971 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9972 
9973 	char* path = pathBuffer.LockBuffer();
9974 
9975 	status_t status = get_cwd(path, size, false);
9976 	if (status != B_OK)
9977 		return status;
9978 
9979 	// Copy back the result
9980 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9981 		return B_BAD_ADDRESS;
9982 
9983 	return status;
9984 }
9985 
9986 
9987 status_t
9988 _user_setcwd(int fd, const char* userPath)
9989 {
9990 	TRACE(("user_setcwd: path = %p\n", userPath));
9991 
9992 	KPath pathBuffer;
9993 	if (pathBuffer.InitCheck() != B_OK)
9994 		return B_NO_MEMORY;
9995 
9996 	char* path = pathBuffer.LockBuffer();
9997 
9998 	if (userPath != NULL) {
9999 		if (!IS_USER_ADDRESS(userPath))
10000 			return B_BAD_ADDRESS;
10001 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10002 		if (status != B_OK)
10003 			return status;
10004 	}
10005 
10006 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10007 }
10008 
10009 
10010 status_t
10011 _user_change_root(const char* userPath)
10012 {
10013 	// only root is allowed to chroot()
10014 	if (geteuid() != 0)
10015 		return B_NOT_ALLOWED;
10016 
10017 	// alloc path buffer
10018 	KPath pathBuffer;
10019 	if (pathBuffer.InitCheck() != B_OK)
10020 		return B_NO_MEMORY;
10021 
10022 	// copy userland path to kernel
10023 	char* path = pathBuffer.LockBuffer();
10024 	if (userPath != NULL) {
10025 		if (!IS_USER_ADDRESS(userPath))
10026 			return B_BAD_ADDRESS;
10027 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10028 		if (status != B_OK)
10029 			return status;
10030 	}
10031 
10032 	// get the vnode
10033 	VnodePutter vnode;
10034 	status_t status = path_to_vnode(path, true, vnode, NULL, false);
10035 	if (status != B_OK)
10036 		return status;
10037 
10038 	// set the new root
10039 	struct io_context* context = get_current_io_context(false);
10040 	mutex_lock(&sIOContextRootLock);
10041 	struct vnode* oldRoot = context->root;
10042 	context->root = vnode.Detach();
10043 	mutex_unlock(&sIOContextRootLock);
10044 
10045 	put_vnode(oldRoot);
10046 
10047 	return B_OK;
10048 }
10049 
10050 
10051 int
10052 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10053 	uint32 flags, port_id port, int32 token)
10054 {
10055 	if (device < 0 || userQuery == NULL || queryLength == 0)
10056 		return B_BAD_VALUE;
10057 
10058 	if (!IS_USER_ADDRESS(userQuery))
10059 		return B_BAD_ADDRESS;
10060 
10061 	// this is a safety restriction
10062 	if (queryLength >= 65536)
10063 		return B_NAME_TOO_LONG;
10064 
10065 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10066 	if (!query.IsValid())
10067 		return B_NO_MEMORY;
10068 
10069 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10070 		return B_BAD_ADDRESS;
10071 
10072 	return query_open(device, query, flags, port, token, false);
10073 }
10074 
10075 
10076 #include "vfs_request_io.cpp"
10077