xref: /haiku/src/system/kernel/fs/vfs.cpp (revision fc75f2df0c666dcc61be83c4facdd3132340c2fb)
1 /*
2  * Copyright 2005-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2011, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 struct vnode_hash_key {
116 	dev_t	device;
117 	ino_t	vnode;
118 };
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and root_vnode->covers fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the root_vnode->covers vnode,
130 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
131 	safe if a reference to vnode is held (note that for the root mount
132 	root_vnode->covers is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		recursive_lock_init(&rlock, "mount rlock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		recursive_lock_destroy(&rlock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	recursive_lock	rlock;	// guards the vnodes list
165 		// TODO: Make this a mutex! It is never used recursively.
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	team_id			team;
178 	pid_t			session;
179 	off_t			start;
180 	off_t			end;
181 	bool			shared;
182 };
183 
184 typedef DoublyLinkedList<advisory_lock> LockList;
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 
224 	The thread trying to lock the lock must not hold sVnodeLock or
225 	sMountMutex.
226 */
227 static recursive_lock sMountOpLock;
228 
229 /*!	\brief Guards sVnodeTable.
230 
231 	The holder is allowed read/write access to sVnodeTable and to
232 	any unbusy vnode in that table, save to the immutable fields (device, id,
233 	private_node, mount) to which only read-only access is allowed.
234 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
235 	well as the busy, removed, unused flags, and the vnode's type can also be
236 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
237 	locked. Write access to covered_by and covers requires to write lock
238 	sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not hold this lock when calling create_sem(), as this might call
242 	vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 #define VNODE_HASH_TABLE_SIZE 1024
256 static hash_table* sVnodeTable;
257 static struct vnode* sRoot;
258 
259 #define MOUNTS_HASH_TABLE_SIZE 16
260 static hash_table* sMountsTable;
261 static dev_t sNextMountID = 1;
262 
263 #define MAX_TEMP_IO_VECS 8
264 
265 mode_t __gUmask = 022;
266 
267 /* function declarations */
268 
269 static void free_unused_vnodes();
270 
271 // file descriptor operation prototypes
272 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
273 	void* buffer, size_t* _bytes);
274 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
275 	const void* buffer, size_t* _bytes);
276 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
277 	int seekType);
278 static void file_free_fd(struct file_descriptor* descriptor);
279 static status_t file_close(struct file_descriptor* descriptor);
280 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
283 	struct selectsync* sync);
284 static status_t dir_read(struct io_context* context,
285 	struct file_descriptor* descriptor, struct dirent* buffer,
286 	size_t bufferSize, uint32* _count);
287 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
288 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
289 static status_t dir_rewind(struct file_descriptor* descriptor);
290 static void dir_free_fd(struct file_descriptor* descriptor);
291 static status_t dir_close(struct file_descriptor* descriptor);
292 static status_t attr_dir_read(struct io_context* context,
293 	struct file_descriptor* descriptor, struct dirent* buffer,
294 	size_t bufferSize, uint32* _count);
295 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
296 static void attr_dir_free_fd(struct file_descriptor* descriptor);
297 static status_t attr_dir_close(struct file_descriptor* descriptor);
298 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
299 	void* buffer, size_t* _bytes);
300 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
301 	const void* buffer, size_t* _bytes);
302 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
303 	int seekType);
304 static void attr_free_fd(struct file_descriptor* descriptor);
305 static status_t attr_close(struct file_descriptor* descriptor);
306 static status_t attr_read_stat(struct file_descriptor* descriptor,
307 	struct stat* statData);
308 static status_t attr_write_stat(struct file_descriptor* descriptor,
309 	const struct stat* stat, int statMask);
310 static status_t index_dir_read(struct io_context* context,
311 	struct file_descriptor* descriptor, struct dirent* buffer,
312 	size_t bufferSize, uint32* _count);
313 static status_t index_dir_rewind(struct file_descriptor* descriptor);
314 static void index_dir_free_fd(struct file_descriptor* descriptor);
315 static status_t index_dir_close(struct file_descriptor* descriptor);
316 static status_t query_read(struct io_context* context,
317 	struct file_descriptor* descriptor, struct dirent* buffer,
318 	size_t bufferSize, uint32* _count);
319 static status_t query_rewind(struct file_descriptor* descriptor);
320 static void query_free_fd(struct file_descriptor* descriptor);
321 static status_t query_close(struct file_descriptor* descriptor);
322 
323 static status_t common_ioctl(struct file_descriptor* descriptor, uint32 op,
324 	void* buffer, size_t length);
325 static status_t common_read_stat(struct file_descriptor* descriptor,
326 	struct stat* statData);
327 static status_t common_write_stat(struct file_descriptor* descriptor,
328 	const struct stat* statData, int statMask);
329 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
330 	struct stat* stat, bool kernel);
331 
332 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
333 	bool traverseLeafLink, int count, bool kernel,
334 	struct vnode** _vnode, ino_t* _parentID);
335 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
336 	size_t bufferSize, bool kernel);
337 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
338 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
339 static void inc_vnode_ref_count(struct vnode* vnode);
340 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
341 	bool reenter);
342 static inline void put_vnode(struct vnode* vnode);
343 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
344 	bool kernel);
345 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
346 
347 
348 static struct fd_ops sFileOps = {
349 	file_read,
350 	file_write,
351 	file_seek,
352 	common_ioctl,
353 	NULL,		// set_flags
354 	file_select,
355 	file_deselect,
356 	NULL,		// read_dir()
357 	NULL,		// rewind_dir()
358 	common_read_stat,
359 	common_write_stat,
360 	file_close,
361 	file_free_fd
362 };
363 
364 static struct fd_ops sDirectoryOps = {
365 	NULL,		// read()
366 	NULL,		// write()
367 	NULL,		// seek()
368 	common_ioctl,
369 	NULL,		// set_flags
370 	NULL,		// select()
371 	NULL,		// deselect()
372 	dir_read,
373 	dir_rewind,
374 	common_read_stat,
375 	common_write_stat,
376 	dir_close,
377 	dir_free_fd
378 };
379 
380 static struct fd_ops sAttributeDirectoryOps = {
381 	NULL,		// read()
382 	NULL,		// write()
383 	NULL,		// seek()
384 	common_ioctl,
385 	NULL,		// set_flags
386 	NULL,		// select()
387 	NULL,		// deselect()
388 	attr_dir_read,
389 	attr_dir_rewind,
390 	common_read_stat,
391 	common_write_stat,
392 	attr_dir_close,
393 	attr_dir_free_fd
394 };
395 
396 static struct fd_ops sAttributeOps = {
397 	attr_read,
398 	attr_write,
399 	attr_seek,
400 	common_ioctl,
401 	NULL,		// set_flags
402 	NULL,		// select()
403 	NULL,		// deselect()
404 	NULL,		// read_dir()
405 	NULL,		// rewind_dir()
406 	attr_read_stat,
407 	attr_write_stat,
408 	attr_close,
409 	attr_free_fd
410 };
411 
412 static struct fd_ops sIndexDirectoryOps = {
413 	NULL,		// read()
414 	NULL,		// write()
415 	NULL,		// seek()
416 	NULL,		// ioctl()
417 	NULL,		// set_flags
418 	NULL,		// select()
419 	NULL,		// deselect()
420 	index_dir_read,
421 	index_dir_rewind,
422 	NULL,		// read_stat()
423 	NULL,		// write_stat()
424 	index_dir_close,
425 	index_dir_free_fd
426 };
427 
428 #if 0
429 static struct fd_ops sIndexOps = {
430 	NULL,		// read()
431 	NULL,		// write()
432 	NULL,		// seek()
433 	NULL,		// ioctl()
434 	NULL,		// set_flags
435 	NULL,		// select()
436 	NULL,		// deselect()
437 	NULL,		// dir_read()
438 	NULL,		// dir_rewind()
439 	index_read_stat,	// read_stat()
440 	NULL,		// write_stat()
441 	NULL,		// dir_close()
442 	NULL		// free_fd()
443 };
444 #endif
445 
446 static struct fd_ops sQueryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	NULL,		// ioctl()
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	query_read,
455 	query_rewind,
456 	NULL,		// read_stat()
457 	NULL,		// write_stat()
458 	query_close,
459 	query_free_fd
460 };
461 
462 
463 // VNodePutter
464 class VNodePutter {
465 public:
466 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
467 
468 	~VNodePutter()
469 	{
470 		Put();
471 	}
472 
473 	void SetTo(struct vnode* vnode)
474 	{
475 		Put();
476 		fVNode = vnode;
477 	}
478 
479 	void Put()
480 	{
481 		if (fVNode) {
482 			put_vnode(fVNode);
483 			fVNode = NULL;
484 		}
485 	}
486 
487 	struct vnode* Detach()
488 	{
489 		struct vnode* vnode = fVNode;
490 		fVNode = NULL;
491 		return vnode;
492 	}
493 
494 private:
495 	struct vnode* fVNode;
496 };
497 
498 
499 class FDCloser {
500 public:
501 	FDCloser() : fFD(-1), fKernel(true) {}
502 
503 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
504 
505 	~FDCloser()
506 	{
507 		Close();
508 	}
509 
510 	void SetTo(int fd, bool kernel)
511 	{
512 		Close();
513 		fFD = fd;
514 		fKernel = kernel;
515 	}
516 
517 	void Close()
518 	{
519 		if (fFD >= 0) {
520 			if (fKernel)
521 				_kern_close(fFD);
522 			else
523 				_user_close(fFD);
524 			fFD = -1;
525 		}
526 	}
527 
528 	int Detach()
529 	{
530 		int fd = fFD;
531 		fFD = -1;
532 		return fd;
533 	}
534 
535 private:
536 	int		fFD;
537 	bool	fKernel;
538 };
539 
540 
541 #if VFS_PAGES_IO_TRACING
542 
543 namespace VFSPagesIOTracing {
544 
545 class PagesIOTraceEntry : public AbstractTraceEntry {
546 protected:
547 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
548 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
549 		status_t status, generic_size_t bytesTransferred)
550 		:
551 		fVnode(vnode),
552 		fMountID(vnode->mount->id),
553 		fNodeID(vnode->id),
554 		fCookie(cookie),
555 		fPos(pos),
556 		fCount(count),
557 		fFlags(flags),
558 		fBytesRequested(bytesRequested),
559 		fStatus(status),
560 		fBytesTransferred(bytesTransferred)
561 	{
562 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs, sizeof(generic_io_vec) * count,
563 			false);
564 	}
565 
566 	void AddDump(TraceOutput& out, const char* mode)
567 	{
568 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
569 			"pos: %lld, size: %llu, vecs: {", mode, fVnode, fMountID, fNodeID,
570 			fCookie, fPos, (uint64)fBytesRequested);
571 
572 		if (fVecs != NULL) {
573 			for (uint32 i = 0; i < fCount; i++) {
574 				if (i > 0)
575 					out.Print(", ");
576 				out.Print("(%llx, %llu)", (uint64)fVecs[i].base, (uint64)fVecs[i].length);
577 			}
578 		}
579 
580 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %llu",
581 			fFlags, fStatus, (uint64)fBytesTransferred);
582 	}
583 
584 protected:
585 	struct vnode*	fVnode;
586 	dev_t			fMountID;
587 	ino_t			fNodeID;
588 	void*			fCookie;
589 	off_t			fPos;
590 	generic_io_vec*		fVecs;
591 	uint32			fCount;
592 	uint32			fFlags;
593 	generic_size_t			fBytesRequested;
594 	status_t		fStatus;
595 	generic_size_t			fBytesTransferred;
596 };
597 
598 
599 class ReadPages : public PagesIOTraceEntry {
600 public:
601 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
602 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
603 		status_t status, generic_size_t bytesTransferred)
604 		:
605 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
606 			bytesRequested, status, bytesTransferred)
607 	{
608 		Initialized();
609 	}
610 
611 	virtual void AddDump(TraceOutput& out)
612 	{
613 		PagesIOTraceEntry::AddDump(out, "read");
614 	}
615 };
616 
617 
618 class WritePages : public PagesIOTraceEntry {
619 public:
620 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
621 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
622 		status_t status, generic_size_t bytesTransferred)
623 		:
624 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
625 			bytesRequested, status, bytesTransferred)
626 	{
627 		Initialized();
628 	}
629 
630 	virtual void AddDump(TraceOutput& out)
631 	{
632 		PagesIOTraceEntry::AddDump(out, "write");
633 	}
634 };
635 
636 }	// namespace VFSPagesIOTracing
637 
638 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
639 #else
640 #	define TPIO(x) ;
641 #endif	// VFS_PAGES_IO_TRACING
642 
643 
644 static int
645 mount_compare(void* _m, const void* _key)
646 {
647 	struct fs_mount* mount = (fs_mount*)_m;
648 	const dev_t* id = (dev_t*)_key;
649 
650 	if (mount->id == *id)
651 		return 0;
652 
653 	return -1;
654 }
655 
656 
657 static uint32
658 mount_hash(void* _m, const void* _key, uint32 range)
659 {
660 	struct fs_mount* mount = (fs_mount*)_m;
661 	const dev_t* id = (dev_t*)_key;
662 
663 	if (mount)
664 		return mount->id % range;
665 
666 	return (uint32)*id % range;
667 }
668 
669 
670 /*! Finds the mounted device (the fs_mount structure) with the given ID.
671 	Note, you must hold the gMountMutex lock when you call this function.
672 */
673 static struct fs_mount*
674 find_mount(dev_t id)
675 {
676 	ASSERT_LOCKED_MUTEX(&sMountMutex);
677 
678 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
679 }
680 
681 
682 static status_t
683 get_mount(dev_t id, struct fs_mount** _mount)
684 {
685 	struct fs_mount* mount;
686 
687 	ReadLocker nodeLocker(sVnodeLock);
688 	MutexLocker mountLocker(sMountMutex);
689 
690 	mount = find_mount(id);
691 	if (mount == NULL)
692 		return B_BAD_VALUE;
693 
694 	struct vnode* rootNode = mount->root_vnode;
695 	if (rootNode == NULL || rootNode->IsBusy() || rootNode->ref_count == 0) {
696 		// might have been called during a mount/unmount operation
697 		return B_BUSY;
698 	}
699 
700 	inc_vnode_ref_count(mount->root_vnode);
701 	*_mount = mount;
702 	return B_OK;
703 }
704 
705 
706 static void
707 put_mount(struct fs_mount* mount)
708 {
709 	if (mount)
710 		put_vnode(mount->root_vnode);
711 }
712 
713 
714 /*!	Tries to open the specified file system module.
715 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
716 	Returns a pointer to file system module interface, or NULL if it
717 	could not open the module.
718 */
719 static file_system_module_info*
720 get_file_system(const char* fsName)
721 {
722 	char name[B_FILE_NAME_LENGTH];
723 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
724 		// construct module name if we didn't get one
725 		// (we currently support only one API)
726 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
727 		fsName = NULL;
728 	}
729 
730 	file_system_module_info* info;
731 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
732 		return NULL;
733 
734 	return info;
735 }
736 
737 
738 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
739 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
740 	The name is allocated for you, and you have to free() it when you're
741 	done with it.
742 	Returns NULL if the required memory is not available.
743 */
744 static char*
745 get_file_system_name(const char* fsName)
746 {
747 	const size_t length = strlen("file_systems/");
748 
749 	if (strncmp(fsName, "file_systems/", length)) {
750 		// the name already seems to be the module's file name
751 		return strdup(fsName);
752 	}
753 
754 	fsName += length;
755 	const char* end = strchr(fsName, '/');
756 	if (end == NULL) {
757 		// this doesn't seem to be a valid name, but well...
758 		return strdup(fsName);
759 	}
760 
761 	// cut off the trailing /v1
762 
763 	char* name = (char*)malloc(end + 1 - fsName);
764 	if (name == NULL)
765 		return NULL;
766 
767 	strlcpy(name, fsName, end + 1 - fsName);
768 	return name;
769 }
770 
771 
772 /*!	Accepts a list of file system names separated by a colon, one for each
773 	layer and returns the file system name for the specified layer.
774 	The name is allocated for you, and you have to free() it when you're
775 	done with it.
776 	Returns NULL if the required memory is not available or if there is no
777 	name for the specified layer.
778 */
779 static char*
780 get_file_system_name_for_layer(const char* fsNames, int32 layer)
781 {
782 	while (layer >= 0) {
783 		const char* end = strchr(fsNames, ':');
784 		if (end == NULL) {
785 			if (layer == 0)
786 				return strdup(fsNames);
787 			return NULL;
788 		}
789 
790 		if (layer == 0) {
791 			size_t length = end - fsNames + 1;
792 			char* result = (char*)malloc(length);
793 			strlcpy(result, fsNames, length);
794 			return result;
795 		}
796 
797 		fsNames = end + 1;
798 		layer--;
799 	}
800 
801 	return NULL;
802 }
803 
804 
805 static int
806 vnode_compare(void* _vnode, const void* _key)
807 {
808 	struct vnode* vnode = (struct vnode*)_vnode;
809 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
810 
811 	if (vnode->device == key->device && vnode->id == key->vnode)
812 		return 0;
813 
814 	return -1;
815 }
816 
817 
818 static uint32
819 vnode_hash(void* _vnode, const void* _key, uint32 range)
820 {
821 	struct vnode* vnode = (struct vnode*)_vnode;
822 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
823 
824 #define VHASH(mountid, vnodeid) \
825 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
826 
827 	if (vnode != NULL)
828 		return VHASH(vnode->device, vnode->id) % range;
829 
830 	return VHASH(key->device, key->vnode) % range;
831 
832 #undef VHASH
833 }
834 
835 
836 static void
837 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
838 {
839 	RecursiveLocker _(mount->rlock);
840 	mount->vnodes.Add(vnode);
841 }
842 
843 
844 static void
845 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
846 {
847 	RecursiveLocker _(mount->rlock);
848 	mount->vnodes.Remove(vnode);
849 }
850 
851 
852 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
853 
854 	The caller must hold the sVnodeLock (read lock at least).
855 
856 	\param mountID the mount ID.
857 	\param vnodeID the node ID.
858 
859 	\return The vnode structure, if it was found in the hash table, \c NULL
860 			otherwise.
861 */
862 static struct vnode*
863 lookup_vnode(dev_t mountID, ino_t vnodeID)
864 {
865 	struct vnode_hash_key key;
866 
867 	key.device = mountID;
868 	key.vnode = vnodeID;
869 
870 	return (vnode*)hash_lookup(sVnodeTable, &key);
871 }
872 
873 
874 /*!	Creates a new vnode with the given mount and node ID.
875 	If the node already exists, it is returned instead and no new node is
876 	created. In either case -- but not, if an error occurs -- the function write
877 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
878 	error the lock is not not held on return.
879 
880 	\param mountID The mount ID.
881 	\param vnodeID The vnode ID.
882 	\param _vnode Will be set to the new vnode on success.
883 	\param _nodeCreated Will be set to \c true when the returned vnode has
884 		been newly created, \c false when it already existed. Will not be
885 		changed on error.
886 	\return \c B_OK, when the vnode was successfully created and inserted or
887 		a node with the given ID was found, \c B_NO_MEMORY or
888 		\c B_ENTRY_NOT_FOUND on error.
889 */
890 static status_t
891 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
892 	bool& _nodeCreated)
893 {
894 	FUNCTION(("create_new_vnode_and_lock()\n"));
895 
896 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
897 	if (vnode == NULL)
898 		return B_NO_MEMORY;
899 
900 	// initialize basic values
901 	memset(vnode, 0, sizeof(struct vnode));
902 	vnode->device = mountID;
903 	vnode->id = vnodeID;
904 	vnode->ref_count = 1;
905 	vnode->SetBusy(true);
906 
907 	// look up the the node -- it might have been added by someone else in the
908 	// meantime
909 	rw_lock_write_lock(&sVnodeLock);
910 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
911 	if (existingVnode != NULL) {
912 		free(vnode);
913 		_vnode = existingVnode;
914 		_nodeCreated = false;
915 		return B_OK;
916 	}
917 
918 	// get the mount structure
919 	mutex_lock(&sMountMutex);
920 	vnode->mount = find_mount(mountID);
921 	if (!vnode->mount || vnode->mount->unmounting) {
922 		mutex_unlock(&sMountMutex);
923 		rw_lock_write_unlock(&sVnodeLock);
924 		free(vnode);
925 		return B_ENTRY_NOT_FOUND;
926 	}
927 
928 	// add the vnode to the mount's node list and the hash table
929 	hash_insert(sVnodeTable, vnode);
930 	add_vnode_to_mount_list(vnode, vnode->mount);
931 
932 	mutex_unlock(&sMountMutex);
933 
934 	_vnode = vnode;
935 	_nodeCreated = true;
936 
937 	// keep the vnode lock locked
938 	return B_OK;
939 }
940 
941 
942 /*!	Frees the vnode and all resources it has acquired, and removes
943 	it from the vnode hash as well as from its mount structure.
944 	Will also make sure that any cache modifications are written back.
945 */
946 static void
947 free_vnode(struct vnode* vnode, bool reenter)
948 {
949 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
950 		vnode);
951 
952 	// write back any changes in this vnode's cache -- but only
953 	// if the vnode won't be deleted, in which case the changes
954 	// will be discarded
955 
956 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
957 		FS_CALL_NO_PARAMS(vnode, fsync);
958 
959 	// Note: If this vnode has a cache attached, there will still be two
960 	// references to that cache at this point. The last one belongs to the vnode
961 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
962 	// cache. Each but the last reference to a cache also includes a reference
963 	// to the vnode. The file cache, however, released its reference (cf.
964 	// file_cache_create()), so that this vnode's ref count has the chance to
965 	// ever drop to 0. Deleting the file cache now, will cause the next to last
966 	// cache reference to be released, which will also release a (no longer
967 	// existing) vnode reference. To avoid problems, we set the vnode's ref
968 	// count, so that it will neither become negative nor 0.
969 	vnode->ref_count = 2;
970 
971 	if (!vnode->IsUnpublished()) {
972 		if (vnode->IsRemoved())
973 			FS_CALL(vnode, remove_vnode, reenter);
974 		else
975 			FS_CALL(vnode, put_vnode, reenter);
976 	}
977 
978 	// If the vnode has a VMCache attached, make sure that it won't try to get
979 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
980 	// long as the vnode is busy and in the hash, that won't happen, but as
981 	// soon as we've removed it from the hash, it could reload the vnode -- with
982 	// a new cache attached!
983 	if (vnode->cache != NULL)
984 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
985 
986 	// The file system has removed the resources of the vnode now, so we can
987 	// make it available again (by removing the busy vnode from the hash).
988 	rw_lock_write_lock(&sVnodeLock);
989 	hash_remove(sVnodeTable, vnode);
990 	rw_lock_write_unlock(&sVnodeLock);
991 
992 	// if we have a VMCache attached, remove it
993 	if (vnode->cache)
994 		vnode->cache->ReleaseRef();
995 
996 	vnode->cache = NULL;
997 
998 	remove_vnode_from_mount_list(vnode, vnode->mount);
999 
1000 	free(vnode);
1001 }
1002 
1003 
1004 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1005 	if the counter dropped to 0.
1006 
1007 	The caller must, of course, own a reference to the vnode to call this
1008 	function.
1009 	The caller must not hold the sVnodeLock or the sMountMutex.
1010 
1011 	\param vnode the vnode.
1012 	\param alwaysFree don't move this vnode into the unused list, but really
1013 		   delete it if possible.
1014 	\param reenter \c true, if this function is called (indirectly) from within
1015 		   a file system. This will be passed to file system hooks only.
1016 	\return \c B_OK, if everything went fine, an error code otherwise.
1017 */
1018 static status_t
1019 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1020 {
1021 	ReadLocker locker(sVnodeLock);
1022 	AutoLocker<Vnode> nodeLocker(vnode);
1023 
1024 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1025 
1026 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1027 
1028 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1029 		vnode->ref_count));
1030 
1031 	if (oldRefCount != 1)
1032 		return B_OK;
1033 
1034 	if (vnode->IsBusy())
1035 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1036 
1037 	bool freeNode = false;
1038 	bool freeUnusedNodes = false;
1039 
1040 	// Just insert the vnode into an unused list if we don't need
1041 	// to delete it
1042 	if (vnode->IsRemoved() || alwaysFree) {
1043 		vnode_to_be_freed(vnode);
1044 		vnode->SetBusy(true);
1045 		freeNode = true;
1046 	} else
1047 		freeUnusedNodes = vnode_unused(vnode);
1048 
1049 	nodeLocker.Unlock();
1050 	locker.Unlock();
1051 
1052 	if (freeNode)
1053 		free_vnode(vnode, reenter);
1054 	else if (freeUnusedNodes)
1055 		free_unused_vnodes();
1056 
1057 	return B_OK;
1058 }
1059 
1060 
1061 /*!	\brief Increments the reference counter of the given vnode.
1062 
1063 	The caller must make sure that the node isn't deleted while this function
1064 	is called. This can be done either:
1065 	- by ensuring that a reference to the node exists and remains in existence,
1066 	  or
1067 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1068 	  or by holding sVnodeLock write locked.
1069 
1070 	In the second case the caller is responsible for dealing with the ref count
1071 	0 -> 1 transition. That is 1. this function must not be invoked when the
1072 	node is busy in the first place and 2. vnode_used() must be called for the
1073 	node.
1074 
1075 	\param vnode the vnode.
1076 */
1077 static void
1078 inc_vnode_ref_count(struct vnode* vnode)
1079 {
1080 	atomic_add(&vnode->ref_count, 1);
1081 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1082 		vnode->ref_count));
1083 }
1084 
1085 
1086 static bool
1087 is_special_node_type(int type)
1088 {
1089 	// at the moment only FIFOs are supported
1090 	return S_ISFIFO(type);
1091 }
1092 
1093 
1094 static status_t
1095 create_special_sub_node(struct vnode* vnode, uint32 flags)
1096 {
1097 	if (S_ISFIFO(vnode->Type()))
1098 		return create_fifo_vnode(vnode->mount->volume, vnode);
1099 
1100 	return B_BAD_VALUE;
1101 }
1102 
1103 
1104 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1105 
1106 	If the node is not yet in memory, it will be loaded.
1107 
1108 	The caller must not hold the sVnodeLock or the sMountMutex.
1109 
1110 	\param mountID the mount ID.
1111 	\param vnodeID the node ID.
1112 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1113 		   retrieved vnode structure shall be written.
1114 	\param reenter \c true, if this function is called (indirectly) from within
1115 		   a file system.
1116 	\return \c B_OK, if everything when fine, an error code otherwise.
1117 */
1118 static status_t
1119 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1120 	int reenter)
1121 {
1122 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID,
1123 		_vnode));
1124 
1125 	rw_lock_read_lock(&sVnodeLock);
1126 
1127 	int32 tries = 2000;
1128 		// try for 10 secs
1129 restart:
1130 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1131 	AutoLocker<Vnode> nodeLocker(vnode);
1132 
1133 	if (vnode && vnode->IsBusy()) {
1134 		nodeLocker.Unlock();
1135 		rw_lock_read_unlock(&sVnodeLock);
1136 		if (!canWait || --tries < 0) {
1137 			// vnode doesn't seem to become unbusy
1138 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID,
1139 				vnodeID);
1140 			return B_BUSY;
1141 		}
1142 		snooze(5000); // 5 ms
1143 		rw_lock_read_lock(&sVnodeLock);
1144 		goto restart;
1145 	}
1146 
1147 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1148 
1149 	status_t status;
1150 
1151 	if (vnode) {
1152 		if (vnode->ref_count == 0) {
1153 			// this vnode has been unused before
1154 			vnode_used(vnode);
1155 		}
1156 		inc_vnode_ref_count(vnode);
1157 
1158 		nodeLocker.Unlock();
1159 		rw_lock_read_unlock(&sVnodeLock);
1160 	} else {
1161 		// we need to create a new vnode and read it in
1162 		rw_lock_read_unlock(&sVnodeLock);
1163 			// unlock -- create_new_vnode_and_lock() write-locks on success
1164 		bool nodeCreated;
1165 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1166 			nodeCreated);
1167 		if (status != B_OK)
1168 			return status;
1169 
1170 		if (!nodeCreated) {
1171 			rw_lock_read_lock(&sVnodeLock);
1172 			rw_lock_write_unlock(&sVnodeLock);
1173 			goto restart;
1174 		}
1175 
1176 		rw_lock_write_unlock(&sVnodeLock);
1177 
1178 		int type;
1179 		uint32 flags;
1180 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1181 			&flags, reenter);
1182 		if (status == B_OK && vnode->private_node == NULL)
1183 			status = B_BAD_VALUE;
1184 
1185 		bool gotNode = status == B_OK;
1186 		bool publishSpecialSubNode = false;
1187 		if (gotNode) {
1188 			vnode->SetType(type);
1189 			publishSpecialSubNode = is_special_node_type(type)
1190 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1191 		}
1192 
1193 		if (gotNode && publishSpecialSubNode)
1194 			status = create_special_sub_node(vnode, flags);
1195 
1196 		if (status != B_OK) {
1197 			if (gotNode)
1198 				FS_CALL(vnode, put_vnode, reenter);
1199 
1200 			rw_lock_write_lock(&sVnodeLock);
1201 			hash_remove(sVnodeTable, vnode);
1202 			remove_vnode_from_mount_list(vnode, vnode->mount);
1203 			rw_lock_write_unlock(&sVnodeLock);
1204 
1205 			free(vnode);
1206 			return status;
1207 		}
1208 
1209 		rw_lock_read_lock(&sVnodeLock);
1210 		vnode->Lock();
1211 
1212 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1213 		vnode->SetBusy(false);
1214 
1215 		vnode->Unlock();
1216 		rw_lock_read_unlock(&sVnodeLock);
1217 	}
1218 
1219 	TRACE(("get_vnode: returning %p\n", vnode));
1220 
1221 	*_vnode = vnode;
1222 	return B_OK;
1223 }
1224 
1225 
1226 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1227 	if the counter dropped to 0.
1228 
1229 	The caller must, of course, own a reference to the vnode to call this
1230 	function.
1231 	The caller must not hold the sVnodeLock or the sMountMutex.
1232 
1233 	\param vnode the vnode.
1234 */
1235 static inline void
1236 put_vnode(struct vnode* vnode)
1237 {
1238 	dec_vnode_ref_count(vnode, false, false);
1239 }
1240 
1241 
1242 static void
1243 free_unused_vnodes(int32 level)
1244 {
1245 	unused_vnodes_check_started();
1246 
1247 	if (level == B_NO_LOW_RESOURCE) {
1248 		unused_vnodes_check_done();
1249 		return;
1250 	}
1251 
1252 	flush_hot_vnodes();
1253 
1254 	// determine how many nodes to free
1255 	uint32 count = 1;
1256 	{
1257 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1258 
1259 		switch (level) {
1260 			case B_LOW_RESOURCE_NOTE:
1261 				count = sUnusedVnodes / 100;
1262 				break;
1263 			case B_LOW_RESOURCE_WARNING:
1264 				count = sUnusedVnodes / 10;
1265 				break;
1266 			case B_LOW_RESOURCE_CRITICAL:
1267 				count = sUnusedVnodes;
1268 				break;
1269 		}
1270 
1271 		if (count > sUnusedVnodes)
1272 			count = sUnusedVnodes;
1273 	}
1274 
1275 	// Write back the modified pages of some unused vnodes and free them.
1276 
1277 	for (uint32 i = 0; i < count; i++) {
1278 		ReadLocker vnodesReadLocker(sVnodeLock);
1279 
1280 		// get the first node
1281 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1282 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1283 			&sUnusedVnodeList);
1284 		unusedVnodesLocker.Unlock();
1285 
1286 		if (vnode == NULL)
1287 			break;
1288 
1289 		// lock the node
1290 		AutoLocker<Vnode> nodeLocker(vnode);
1291 
1292 		// Check whether the node is still unused -- since we only append to the
1293 		// the tail of the unused queue, the vnode should still be at its head.
1294 		// Alternatively we could check its ref count for 0 and its busy flag,
1295 		// but if the node is no longer at the head of the queue, it means it
1296 		// has been touched in the meantime, i.e. it is no longer the least
1297 		// recently used unused vnode and we rather don't free it.
1298 		unusedVnodesLocker.Lock();
1299 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1300 			continue;
1301 		unusedVnodesLocker.Unlock();
1302 
1303 		ASSERT(!vnode->IsBusy());
1304 
1305 		// grab a reference
1306 		inc_vnode_ref_count(vnode);
1307 		vnode_used(vnode);
1308 
1309 		// write back changes and free the node
1310 		nodeLocker.Unlock();
1311 		vnodesReadLocker.Unlock();
1312 
1313 		if (vnode->cache != NULL)
1314 			vnode->cache->WriteModified();
1315 
1316 		dec_vnode_ref_count(vnode, true, false);
1317 			// this should free the vnode when it's still unused
1318 	}
1319 
1320 	unused_vnodes_check_done();
1321 }
1322 
1323 
1324 /*!	Gets the vnode the given vnode is covering.
1325 
1326 	The caller must have \c sVnodeLock read-locked at least.
1327 
1328 	The function returns a reference to the retrieved vnode (if any), the caller
1329 	is responsible to free.
1330 
1331 	\param vnode The vnode whose covered node shall be returned.
1332 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1333 		vnode.
1334 */
1335 static inline Vnode*
1336 get_covered_vnode_locked(Vnode* vnode)
1337 {
1338 	if (Vnode* coveredNode = vnode->covers) {
1339 		while (coveredNode->covers != NULL)
1340 			coveredNode = coveredNode->covers;
1341 
1342 		inc_vnode_ref_count(coveredNode);
1343 		return coveredNode;
1344 	}
1345 
1346 	return NULL;
1347 }
1348 
1349 
1350 /*!	Gets the vnode the given vnode is covering.
1351 
1352 	The caller must not hold \c sVnodeLock. Note that this implies a race
1353 	condition, since the situation can change at any time.
1354 
1355 	The function returns a reference to the retrieved vnode (if any), the caller
1356 	is responsible to free.
1357 
1358 	\param vnode The vnode whose covered node shall be returned.
1359 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1360 		vnode.
1361 */
1362 static inline Vnode*
1363 get_covered_vnode(Vnode* vnode)
1364 {
1365 	if (!vnode->IsCovering())
1366 		return NULL;
1367 
1368 	ReadLocker vnodeReadLocker(sVnodeLock);
1369 	return get_covered_vnode_locked(vnode);
1370 }
1371 
1372 
1373 /*!	Gets the vnode the given vnode is covered by.
1374 
1375 	The caller must have \c sVnodeLock read-locked at least.
1376 
1377 	The function returns a reference to the retrieved vnode (if any), the caller
1378 	is responsible to free.
1379 
1380 	\param vnode The vnode whose covering node shall be returned.
1381 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1382 		any vnode.
1383 */
1384 static Vnode*
1385 get_covering_vnode_locked(Vnode* vnode)
1386 {
1387 	if (Vnode* coveringNode = vnode->covered_by) {
1388 		while (coveringNode->covered_by != NULL)
1389 			coveringNode = coveringNode->covered_by;
1390 
1391 		inc_vnode_ref_count(coveringNode);
1392 		return coveringNode;
1393 	}
1394 
1395 	return NULL;
1396 }
1397 
1398 
1399 /*!	Gets the vnode the given vnode is covered by.
1400 
1401 	The caller must not hold \c sVnodeLock. Note that this implies a race
1402 	condition, since the situation can change at any time.
1403 
1404 	The function returns a reference to the retrieved vnode (if any), the caller
1405 	is responsible to free.
1406 
1407 	\param vnode The vnode whose covering node shall be returned.
1408 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1409 		any vnode.
1410 */
1411 static inline Vnode*
1412 get_covering_vnode(Vnode* vnode)
1413 {
1414 	if (!vnode->IsCovered())
1415 		return NULL;
1416 
1417 	ReadLocker vnodeReadLocker(sVnodeLock);
1418 	return get_covering_vnode_locked(vnode);
1419 }
1420 
1421 
1422 static void
1423 free_unused_vnodes()
1424 {
1425 	free_unused_vnodes(
1426 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1427 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1428 }
1429 
1430 
1431 static void
1432 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1433 {
1434 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1435 
1436 	free_unused_vnodes(level);
1437 }
1438 
1439 
1440 static inline void
1441 put_advisory_locking(struct advisory_locking* locking)
1442 {
1443 	release_sem(locking->lock);
1444 }
1445 
1446 
1447 /*!	Returns the advisory_locking object of the \a vnode in case it
1448 	has one, and locks it.
1449 	You have to call put_advisory_locking() when you're done with
1450 	it.
1451 	Note, you must not have the vnode mutex locked when calling
1452 	this function.
1453 */
1454 static struct advisory_locking*
1455 get_advisory_locking(struct vnode* vnode)
1456 {
1457 	rw_lock_read_lock(&sVnodeLock);
1458 	vnode->Lock();
1459 
1460 	struct advisory_locking* locking = vnode->advisory_locking;
1461 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1462 
1463 	vnode->Unlock();
1464 	rw_lock_read_unlock(&sVnodeLock);
1465 
1466 	if (lock >= 0)
1467 		lock = acquire_sem(lock);
1468 	if (lock < 0) {
1469 		// This means the locking has been deleted in the mean time
1470 		// or had never existed in the first place - otherwise, we
1471 		// would get the lock at some point.
1472 		return NULL;
1473 	}
1474 
1475 	return locking;
1476 }
1477 
1478 
1479 /*!	Creates a locked advisory_locking object, and attaches it to the
1480 	given \a vnode.
1481 	Returns B_OK in case of success - also if the vnode got such an
1482 	object from someone else in the mean time, you'll still get this
1483 	one locked then.
1484 */
1485 static status_t
1486 create_advisory_locking(struct vnode* vnode)
1487 {
1488 	if (vnode == NULL)
1489 		return B_FILE_ERROR;
1490 
1491 	ObjectDeleter<advisory_locking> lockingDeleter;
1492 	struct advisory_locking* locking = NULL;
1493 
1494 	while (get_advisory_locking(vnode) == NULL) {
1495 		// no locking object set on the vnode yet, create one
1496 		if (locking == NULL) {
1497 			locking = new(std::nothrow) advisory_locking;
1498 			if (locking == NULL)
1499 				return B_NO_MEMORY;
1500 			lockingDeleter.SetTo(locking);
1501 
1502 			locking->wait_sem = create_sem(0, "advisory lock");
1503 			if (locking->wait_sem < 0)
1504 				return locking->wait_sem;
1505 
1506 			locking->lock = create_sem(0, "advisory locking");
1507 			if (locking->lock < 0)
1508 				return locking->lock;
1509 		}
1510 
1511 		// set our newly created locking object
1512 		ReadLocker _(sVnodeLock);
1513 		AutoLocker<Vnode> nodeLocker(vnode);
1514 		if (vnode->advisory_locking == NULL) {
1515 			vnode->advisory_locking = locking;
1516 			lockingDeleter.Detach();
1517 			return B_OK;
1518 		}
1519 	}
1520 
1521 	// The vnode already had a locking object. That's just as well.
1522 
1523 	return B_OK;
1524 }
1525 
1526 
1527 /*!	Retrieves the first lock that has been set by the current team.
1528 */
1529 static status_t
1530 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1531 {
1532 	struct advisory_locking* locking = get_advisory_locking(vnode);
1533 	if (locking == NULL)
1534 		return B_BAD_VALUE;
1535 
1536 	// TODO: this should probably get the flock by its file descriptor!
1537 	team_id team = team_get_current_team_id();
1538 	status_t status = B_BAD_VALUE;
1539 
1540 	LockList::Iterator iterator = locking->locks.GetIterator();
1541 	while (iterator.HasNext()) {
1542 		struct advisory_lock* lock = iterator.Next();
1543 
1544 		if (lock->team == team) {
1545 			flock->l_start = lock->start;
1546 			flock->l_len = lock->end - lock->start + 1;
1547 			status = B_OK;
1548 			break;
1549 		}
1550 	}
1551 
1552 	put_advisory_locking(locking);
1553 	return status;
1554 }
1555 
1556 
1557 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1558 	with the advisory_lock \a lock.
1559 */
1560 static bool
1561 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1562 {
1563 	if (flock == NULL)
1564 		return true;
1565 
1566 	return lock->start <= flock->l_start - 1 + flock->l_len
1567 		&& lock->end >= flock->l_start;
1568 }
1569 
1570 
1571 /*!	Removes the specified lock, or all locks of the calling team
1572 	if \a flock is NULL.
1573 */
1574 static status_t
1575 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1576 {
1577 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1578 
1579 	struct advisory_locking* locking = get_advisory_locking(vnode);
1580 	if (locking == NULL)
1581 		return B_OK;
1582 
1583 	// TODO: use the thread ID instead??
1584 	team_id team = team_get_current_team_id();
1585 	pid_t session = thread_get_current_thread()->team->session_id;
1586 
1587 	// find matching lock entries
1588 
1589 	LockList::Iterator iterator = locking->locks.GetIterator();
1590 	while (iterator.HasNext()) {
1591 		struct advisory_lock* lock = iterator.Next();
1592 		bool removeLock = false;
1593 
1594 		if (lock->session == session)
1595 			removeLock = true;
1596 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1597 			bool endsBeyond = false;
1598 			bool startsBefore = false;
1599 			if (flock != NULL) {
1600 				startsBefore = lock->start < flock->l_start;
1601 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1602 			}
1603 
1604 			if (!startsBefore && !endsBeyond) {
1605 				// lock is completely contained in flock
1606 				removeLock = true;
1607 			} else if (startsBefore && !endsBeyond) {
1608 				// cut the end of the lock
1609 				lock->end = flock->l_start - 1;
1610 			} else if (!startsBefore && endsBeyond) {
1611 				// cut the start of the lock
1612 				lock->start = flock->l_start + flock->l_len;
1613 			} else {
1614 				// divide the lock into two locks
1615 				struct advisory_lock* secondLock = new advisory_lock;
1616 				if (secondLock == NULL) {
1617 					// TODO: we should probably revert the locks we already
1618 					// changed... (ie. allocate upfront)
1619 					put_advisory_locking(locking);
1620 					return B_NO_MEMORY;
1621 				}
1622 
1623 				lock->end = flock->l_start - 1;
1624 
1625 				secondLock->team = lock->team;
1626 				secondLock->session = lock->session;
1627 				// values must already be normalized when getting here
1628 				secondLock->start = flock->l_start + flock->l_len;
1629 				secondLock->end = lock->end;
1630 				secondLock->shared = lock->shared;
1631 
1632 				locking->locks.Add(secondLock);
1633 			}
1634 		}
1635 
1636 		if (removeLock) {
1637 			// this lock is no longer used
1638 			iterator.Remove();
1639 			free(lock);
1640 		}
1641 	}
1642 
1643 	bool removeLocking = locking->locks.IsEmpty();
1644 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1645 
1646 	put_advisory_locking(locking);
1647 
1648 	if (removeLocking) {
1649 		// We can remove the whole advisory locking structure; it's no
1650 		// longer used
1651 		locking = get_advisory_locking(vnode);
1652 		if (locking != NULL) {
1653 			ReadLocker locker(sVnodeLock);
1654 			AutoLocker<Vnode> nodeLocker(vnode);
1655 
1656 			// the locking could have been changed in the mean time
1657 			if (locking->locks.IsEmpty()) {
1658 				vnode->advisory_locking = NULL;
1659 				nodeLocker.Unlock();
1660 				locker.Unlock();
1661 
1662 				// we've detached the locking from the vnode, so we can
1663 				// safely delete it
1664 				delete locking;
1665 			} else {
1666 				// the locking is in use again
1667 				nodeLocker.Unlock();
1668 				locker.Unlock();
1669 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1670 			}
1671 		}
1672 	}
1673 
1674 	return B_OK;
1675 }
1676 
1677 
1678 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1679 	will wait for the lock to become available, if there are any collisions
1680 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1681 
1682 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1683 	BSD flock() semantics are used, that is, all children can unlock the file
1684 	in question (we even allow parents to remove the lock, though, but that
1685 	seems to be in line to what the BSD's are doing).
1686 */
1687 static status_t
1688 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1689 	bool wait)
1690 {
1691 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1692 		vnode, flock, wait ? "yes" : "no"));
1693 
1694 	bool shared = flock->l_type == F_RDLCK;
1695 	status_t status = B_OK;
1696 
1697 	// TODO: do deadlock detection!
1698 
1699 	struct advisory_locking* locking;
1700 
1701 	while (true) {
1702 		// if this vnode has an advisory_locking structure attached,
1703 		// lock that one and search for any colliding file lock
1704 		status = create_advisory_locking(vnode);
1705 		if (status != B_OK)
1706 			return status;
1707 
1708 		locking = vnode->advisory_locking;
1709 		team_id team = team_get_current_team_id();
1710 		sem_id waitForLock = -1;
1711 
1712 		// test for collisions
1713 		LockList::Iterator iterator = locking->locks.GetIterator();
1714 		while (iterator.HasNext()) {
1715 			struct advisory_lock* lock = iterator.Next();
1716 
1717 			// TODO: locks from the same team might be joinable!
1718 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1719 				// locks do overlap
1720 				if (!shared || !lock->shared) {
1721 					// we need to wait
1722 					waitForLock = locking->wait_sem;
1723 					break;
1724 				}
1725 			}
1726 		}
1727 
1728 		if (waitForLock < 0)
1729 			break;
1730 
1731 		// We need to wait. Do that or fail now, if we've been asked not to.
1732 
1733 		if (!wait) {
1734 			put_advisory_locking(locking);
1735 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1736 		}
1737 
1738 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1739 			B_CAN_INTERRUPT, 0);
1740 		if (status != B_OK && status != B_BAD_SEM_ID)
1741 			return status;
1742 
1743 		// We have been notified, but we need to re-lock the locking object. So
1744 		// go another round...
1745 	}
1746 
1747 	// install new lock
1748 
1749 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1750 		sizeof(struct advisory_lock));
1751 	if (lock == NULL) {
1752 		put_advisory_locking(locking);
1753 		return B_NO_MEMORY;
1754 	}
1755 
1756 	lock->team = team_get_current_team_id();
1757 	lock->session = session;
1758 	// values must already be normalized when getting here
1759 	lock->start = flock->l_start;
1760 	lock->end = flock->l_start - 1 + flock->l_len;
1761 	lock->shared = shared;
1762 
1763 	locking->locks.Add(lock);
1764 	put_advisory_locking(locking);
1765 
1766 	return status;
1767 }
1768 
1769 
1770 /*!	Normalizes the \a flock structure to make it easier to compare the
1771 	structure with others. The l_start and l_len fields are set to absolute
1772 	values according to the l_whence field.
1773 */
1774 static status_t
1775 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1776 {
1777 	switch (flock->l_whence) {
1778 		case SEEK_SET:
1779 			break;
1780 		case SEEK_CUR:
1781 			flock->l_start += descriptor->pos;
1782 			break;
1783 		case SEEK_END:
1784 		{
1785 			struct vnode* vnode = descriptor->u.vnode;
1786 			struct stat stat;
1787 			status_t status;
1788 
1789 			if (!HAS_FS_CALL(vnode, read_stat))
1790 				return B_UNSUPPORTED;
1791 
1792 			status = FS_CALL(vnode, read_stat, &stat);
1793 			if (status != B_OK)
1794 				return status;
1795 
1796 			flock->l_start += stat.st_size;
1797 			break;
1798 		}
1799 		default:
1800 			return B_BAD_VALUE;
1801 	}
1802 
1803 	if (flock->l_start < 0)
1804 		flock->l_start = 0;
1805 	if (flock->l_len == 0)
1806 		flock->l_len = OFF_MAX;
1807 
1808 	// don't let the offset and length overflow
1809 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1810 		flock->l_len = OFF_MAX - flock->l_start;
1811 
1812 	if (flock->l_len < 0) {
1813 		// a negative length reverses the region
1814 		flock->l_start += flock->l_len;
1815 		flock->l_len = -flock->l_len;
1816 	}
1817 
1818 	return B_OK;
1819 }
1820 
1821 
1822 static void
1823 replace_vnode_if_disconnected(struct fs_mount* mount,
1824 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1825 	struct vnode* fallBack, bool lockRootLock)
1826 {
1827 	struct vnode* givenVnode = vnode;
1828 	bool vnodeReplaced = false;
1829 
1830 	ReadLocker vnodeReadLocker(sVnodeLock);
1831 
1832 	if (lockRootLock)
1833 		mutex_lock(&sIOContextRootLock);
1834 
1835 	while (vnode != NULL && vnode->mount == mount
1836 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1837 		if (vnode->covers != NULL) {
1838 			// redirect the vnode to the covered vnode
1839 			vnode = vnode->covers;
1840 		} else
1841 			vnode = fallBack;
1842 
1843 		vnodeReplaced = true;
1844 	}
1845 
1846 	// If we've replaced the node, grab a reference for the new one.
1847 	if (vnodeReplaced && vnode != NULL)
1848 		inc_vnode_ref_count(vnode);
1849 
1850 	if (lockRootLock)
1851 		mutex_unlock(&sIOContextRootLock);
1852 
1853 	vnodeReadLocker.Unlock();
1854 
1855 	if (vnodeReplaced)
1856 		put_vnode(givenVnode);
1857 }
1858 
1859 
1860 /*!	Disconnects all file descriptors that are associated with the
1861 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1862 	\a mount object.
1863 
1864 	Note, after you've called this function, there might still be ongoing
1865 	accesses - they won't be interrupted if they already happened before.
1866 	However, any subsequent access will fail.
1867 
1868 	This is not a cheap function and should be used with care and rarely.
1869 	TODO: there is currently no means to stop a blocking read/write!
1870 */
1871 static void
1872 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1873 	struct vnode* vnodeToDisconnect)
1874 {
1875 	// iterate over all teams and peek into their file descriptors
1876 	TeamListIterator teamIterator;
1877 	while (Team* team = teamIterator.Next()) {
1878 		BReference<Team> teamReference(team, true);
1879 
1880 		// lock the I/O context
1881 		io_context* context = team->io_context;
1882 		MutexLocker contextLocker(context->io_mutex);
1883 
1884 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1885 			sRoot, true);
1886 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1887 			sRoot, false);
1888 
1889 		for (uint32 i = 0; i < context->table_size; i++) {
1890 			if (struct file_descriptor* descriptor = context->fds[i]) {
1891 				inc_fd_ref_count(descriptor);
1892 
1893 				// if this descriptor points at this mount, we
1894 				// need to disconnect it to be able to unmount
1895 				struct vnode* vnode = fd_vnode(descriptor);
1896 				if (vnodeToDisconnect != NULL) {
1897 					if (vnode == vnodeToDisconnect)
1898 						disconnect_fd(descriptor);
1899 				} else if ((vnode != NULL && vnode->mount == mount)
1900 					|| (vnode == NULL && descriptor->u.mount == mount))
1901 					disconnect_fd(descriptor);
1902 
1903 				put_fd(descriptor);
1904 			}
1905 		}
1906 	}
1907 }
1908 
1909 
1910 /*!	\brief Gets the root node of the current IO context.
1911 	If \a kernel is \c true, the kernel IO context will be used.
1912 	The caller obtains a reference to the returned node.
1913 */
1914 struct vnode*
1915 get_root_vnode(bool kernel)
1916 {
1917 	if (!kernel) {
1918 		// Get current working directory from io context
1919 		struct io_context* context = get_current_io_context(kernel);
1920 
1921 		mutex_lock(&sIOContextRootLock);
1922 
1923 		struct vnode* root = context->root;
1924 		if (root != NULL)
1925 			inc_vnode_ref_count(root);
1926 
1927 		mutex_unlock(&sIOContextRootLock);
1928 
1929 		if (root != NULL)
1930 			return root;
1931 
1932 		// That should never happen.
1933 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1934 			"root\n", team_get_current_team_id());
1935 	}
1936 
1937 	inc_vnode_ref_count(sRoot);
1938 	return sRoot;
1939 }
1940 
1941 
1942 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
1943 
1944 	Given an arbitrary vnode (identified by mount and node ID), the function
1945 	checks, whether the vnode is covered by another vnode. If it is, the
1946 	function returns the mount and node ID of the covering vnode. Otherwise
1947 	it simply returns the supplied mount and node ID.
1948 
1949 	In case of error (e.g. the supplied node could not be found) the variables
1950 	for storing the resolved mount and node ID remain untouched and an error
1951 	code is returned.
1952 
1953 	\param mountID The mount ID of the vnode in question.
1954 	\param nodeID The node ID of the vnode in question.
1955 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1956 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1957 	\return
1958 	- \c B_OK, if everything went fine,
1959 	- another error code, if something went wrong.
1960 */
1961 status_t
1962 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
1963 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1964 {
1965 	// get the node
1966 	struct vnode* node;
1967 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1968 	if (error != B_OK)
1969 		return error;
1970 
1971 	// resolve the node
1972 	if (Vnode* coveringNode = get_covering_vnode(node)) {
1973 		put_vnode(node);
1974 		node = coveringNode;
1975 	}
1976 
1977 	// set the return values
1978 	*resolvedMountID = node->device;
1979 	*resolvedNodeID = node->id;
1980 
1981 	put_vnode(node);
1982 
1983 	return B_OK;
1984 }
1985 
1986 
1987 /*!	\brief Gets the directory path and leaf name for a given path.
1988 
1989 	The supplied \a path is transformed to refer to the directory part of
1990 	the entry identified by the original path, and into the buffer \a filename
1991 	the leaf name of the original entry is written.
1992 	Neither the returned path nor the leaf name can be expected to be
1993 	canonical.
1994 
1995 	\param path The path to be analyzed. Must be able to store at least one
1996 		   additional character.
1997 	\param filename The buffer into which the leaf name will be written.
1998 		   Must be of size B_FILE_NAME_LENGTH at least.
1999 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2000 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2001 		   if the given path name is empty.
2002 */
2003 static status_t
2004 get_dir_path_and_leaf(char* path, char* filename)
2005 {
2006 	if (*path == '\0')
2007 		return B_ENTRY_NOT_FOUND;
2008 
2009 	char* last = strrchr(path, '/');
2010 		// '/' are not allowed in file names!
2011 
2012 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2013 
2014 	if (last == NULL) {
2015 		// this path is single segment with no '/' in it
2016 		// ex. "foo"
2017 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2018 			return B_NAME_TOO_LONG;
2019 
2020 		strcpy(path, ".");
2021 	} else {
2022 		last++;
2023 		if (last[0] == '\0') {
2024 			// special case: the path ends in one or more '/' - remove them
2025 			while (*--last == '/' && last != path);
2026 			last[1] = '\0';
2027 
2028 			if (last == path && last[0] == '/') {
2029 				// This path points to the root of the file system
2030 				strcpy(filename, ".");
2031 				return B_OK;
2032 			}
2033 			for (; last != path && *(last - 1) != '/'; last--);
2034 				// rewind to the start of the leaf before the '/'
2035 		}
2036 
2037 		// normal leaf: replace the leaf portion of the path with a '.'
2038 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2039 			return B_NAME_TOO_LONG;
2040 
2041 		last[0] = '.';
2042 		last[1] = '\0';
2043 	}
2044 	return B_OK;
2045 }
2046 
2047 
2048 static status_t
2049 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2050 	bool traverse, bool kernel, struct vnode** _vnode)
2051 {
2052 	char clonedName[B_FILE_NAME_LENGTH + 1];
2053 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2054 		return B_NAME_TOO_LONG;
2055 
2056 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2057 	struct vnode* directory;
2058 
2059 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2060 	if (status < 0)
2061 		return status;
2062 
2063 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2064 		_vnode, NULL);
2065 }
2066 
2067 
2068 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2069 	and returns the respective vnode.
2070 	On success a reference to the vnode is acquired for the caller.
2071 */
2072 static status_t
2073 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2074 {
2075 	ino_t id;
2076 
2077 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2078 		return get_vnode(dir->device, id, _vnode, true, false);
2079 
2080 	status_t status = FS_CALL(dir, lookup, name, &id);
2081 	if (status != B_OK)
2082 		return status;
2083 
2084 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2085 	// have a reference and just need to look the node up.
2086 	rw_lock_read_lock(&sVnodeLock);
2087 	*_vnode = lookup_vnode(dir->device, id);
2088 	rw_lock_read_unlock(&sVnodeLock);
2089 
2090 	if (*_vnode == NULL) {
2091 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2092 			"0x%Lx)\n", dir->device, id);
2093 		return B_ENTRY_NOT_FOUND;
2094 	}
2095 
2096 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2097 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2098 //		(*_vnode)->mount->id, (*_vnode)->id);
2099 
2100 	return B_OK;
2101 }
2102 
2103 
2104 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2105 	\a path must not be NULL.
2106 	If it returns successfully, \a path contains the name of the last path
2107 	component. This function clobbers the buffer pointed to by \a path only
2108 	if it does contain more than one component.
2109 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2110 	it is successful or not!
2111 */
2112 static status_t
2113 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2114 	int count, struct io_context* ioContext, struct vnode** _vnode,
2115 	ino_t* _parentID)
2116 {
2117 	status_t status = B_OK;
2118 	ino_t lastParentID = vnode->id;
2119 
2120 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2121 
2122 	if (path == NULL) {
2123 		put_vnode(vnode);
2124 		return B_BAD_VALUE;
2125 	}
2126 
2127 	if (*path == '\0') {
2128 		put_vnode(vnode);
2129 		return B_ENTRY_NOT_FOUND;
2130 	}
2131 
2132 	while (true) {
2133 		struct vnode* nextVnode;
2134 		char* nextPath;
2135 
2136 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2137 			path));
2138 
2139 		// done?
2140 		if (path[0] == '\0')
2141 			break;
2142 
2143 		// walk to find the next path component ("path" will point to a single
2144 		// path component), and filter out multiple slashes
2145 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2146 				nextPath++);
2147 
2148 		if (*nextPath == '/') {
2149 			*nextPath = '\0';
2150 			do
2151 				nextPath++;
2152 			while (*nextPath == '/');
2153 		}
2154 
2155 		// See if the '..' is at a covering vnode move to the covered
2156 		// vnode so we pass the '..' path to the underlying filesystem.
2157 		// Also prevent breaking the root of the IO context.
2158 		if (strcmp("..", path) == 0) {
2159 			if (vnode == ioContext->root) {
2160 				// Attempted prison break! Keep it contained.
2161 				path = nextPath;
2162 				continue;
2163 			}
2164 
2165 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2166 				nextVnode = coveredVnode;
2167 				put_vnode(vnode);
2168 				vnode = nextVnode;
2169 			}
2170 		}
2171 
2172 		// check if vnode is really a directory
2173 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2174 			status = B_NOT_A_DIRECTORY;
2175 
2176 		// Check if we have the right to search the current directory vnode.
2177 		// If a file system doesn't have the access() function, we assume that
2178 		// searching a directory is always allowed
2179 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2180 			status = FS_CALL(vnode, access, X_OK);
2181 
2182 		// Tell the filesystem to get the vnode of this path component (if we
2183 		// got the permission from the call above)
2184 		if (status == B_OK)
2185 			status = lookup_dir_entry(vnode, path, &nextVnode);
2186 
2187 		if (status != B_OK) {
2188 			put_vnode(vnode);
2189 			return status;
2190 		}
2191 
2192 		// If the new node is a symbolic link, resolve it (if we've been told
2193 		// to do it)
2194 		if (S_ISLNK(nextVnode->Type())
2195 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2196 			size_t bufferSize;
2197 			char* buffer;
2198 
2199 			TRACE(("traverse link\n"));
2200 
2201 			// it's not exactly nice style using goto in this way, but hey,
2202 			// it works :-/
2203 			if (count + 1 > B_MAX_SYMLINKS) {
2204 				status = B_LINK_LIMIT;
2205 				goto resolve_link_error;
2206 			}
2207 
2208 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2209 			if (buffer == NULL) {
2210 				status = B_NO_MEMORY;
2211 				goto resolve_link_error;
2212 			}
2213 
2214 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2215 				bufferSize--;
2216 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2217 				// null-terminate
2218 				if (status >= 0)
2219 					buffer[bufferSize] = '\0';
2220 			} else
2221 				status = B_BAD_VALUE;
2222 
2223 			if (status != B_OK) {
2224 				free(buffer);
2225 
2226 		resolve_link_error:
2227 				put_vnode(vnode);
2228 				put_vnode(nextVnode);
2229 
2230 				return status;
2231 			}
2232 			put_vnode(nextVnode);
2233 
2234 			// Check if we start from the root directory or the current
2235 			// directory ("vnode" still points to that one).
2236 			// Cut off all leading slashes if it's the root directory
2237 			path = buffer;
2238 			bool absoluteSymlink = false;
2239 			if (path[0] == '/') {
2240 				// we don't need the old directory anymore
2241 				put_vnode(vnode);
2242 
2243 				while (*++path == '/')
2244 					;
2245 
2246 				mutex_lock(&sIOContextRootLock);
2247 				vnode = ioContext->root;
2248 				inc_vnode_ref_count(vnode);
2249 				mutex_unlock(&sIOContextRootLock);
2250 
2251 				absoluteSymlink = true;
2252 			}
2253 
2254 			inc_vnode_ref_count(vnode);
2255 				// balance the next recursion - we will decrement the
2256 				// ref_count of the vnode, no matter if we succeeded or not
2257 
2258 			if (absoluteSymlink && *path == '\0') {
2259 				// symlink was just "/"
2260 				nextVnode = vnode;
2261 			} else {
2262 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2263 					ioContext, &nextVnode, &lastParentID);
2264 			}
2265 
2266 			free(buffer);
2267 
2268 			if (status != B_OK) {
2269 				put_vnode(vnode);
2270 				return status;
2271 			}
2272 		} else
2273 			lastParentID = vnode->id;
2274 
2275 		// decrease the ref count on the old dir we just looked up into
2276 		put_vnode(vnode);
2277 
2278 		path = nextPath;
2279 		vnode = nextVnode;
2280 
2281 		// see if we hit a covered node
2282 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2283 			put_vnode(vnode);
2284 			vnode = coveringNode;
2285 		}
2286 	}
2287 
2288 	*_vnode = vnode;
2289 	if (_parentID)
2290 		*_parentID = lastParentID;
2291 
2292 	return B_OK;
2293 }
2294 
2295 
2296 static status_t
2297 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2298 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2299 {
2300 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2301 		get_current_io_context(kernel), _vnode, _parentID);
2302 }
2303 
2304 
2305 static status_t
2306 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2307 	ino_t* _parentID, bool kernel)
2308 {
2309 	struct vnode* start = NULL;
2310 
2311 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2312 
2313 	if (!path)
2314 		return B_BAD_VALUE;
2315 
2316 	if (*path == '\0')
2317 		return B_ENTRY_NOT_FOUND;
2318 
2319 	// figure out if we need to start at root or at cwd
2320 	if (*path == '/') {
2321 		if (sRoot == NULL) {
2322 			// we're a bit early, aren't we?
2323 			return B_ERROR;
2324 		}
2325 
2326 		while (*++path == '/')
2327 			;
2328 		start = get_root_vnode(kernel);
2329 
2330 		if (*path == '\0') {
2331 			*_vnode = start;
2332 			return B_OK;
2333 		}
2334 
2335 	} else {
2336 		struct io_context* context = get_current_io_context(kernel);
2337 
2338 		mutex_lock(&context->io_mutex);
2339 		start = context->cwd;
2340 		if (start != NULL)
2341 			inc_vnode_ref_count(start);
2342 		mutex_unlock(&context->io_mutex);
2343 
2344 		if (start == NULL)
2345 			return B_ERROR;
2346 	}
2347 
2348 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2349 		_parentID);
2350 }
2351 
2352 
2353 /*! Returns the vnode in the next to last segment of the path, and returns
2354 	the last portion in filename.
2355 	The path buffer must be able to store at least one additional character.
2356 */
2357 static status_t
2358 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2359 	bool kernel)
2360 {
2361 	status_t status = get_dir_path_and_leaf(path, filename);
2362 	if (status != B_OK)
2363 		return status;
2364 
2365 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2366 }
2367 
2368 
2369 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2370 		   to by a FD + path pair.
2371 
2372 	\a path must be given in either case. \a fd might be omitted, in which
2373 	case \a path is either an absolute path or one relative to the current
2374 	directory. If both a supplied and \a path is relative it is reckoned off
2375 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2376 	ignored.
2377 
2378 	The caller has the responsibility to call put_vnode() on the returned
2379 	directory vnode.
2380 
2381 	\param fd The FD. May be < 0.
2382 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2383 	       is modified by this function. It must have at least room for a
2384 	       string one character longer than the path it contains.
2385 	\param _vnode A pointer to a variable the directory vnode shall be written
2386 		   into.
2387 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2388 		   the leaf name of the specified entry will be written.
2389 	\param kernel \c true, if invoked from inside the kernel, \c false if
2390 		   invoked from userland.
2391 	\return \c B_OK, if everything went fine, another error code otherwise.
2392 */
2393 static status_t
2394 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2395 	char* filename, bool kernel)
2396 {
2397 	if (!path)
2398 		return B_BAD_VALUE;
2399 	if (*path == '\0')
2400 		return B_ENTRY_NOT_FOUND;
2401 	if (fd < 0)
2402 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2403 
2404 	status_t status = get_dir_path_and_leaf(path, filename);
2405 	if (status != B_OK)
2406 		return status;
2407 
2408 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2409 }
2410 
2411 
2412 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2413 		   to by a vnode + path pair.
2414 
2415 	\a path must be given in either case. \a vnode might be omitted, in which
2416 	case \a path is either an absolute path or one relative to the current
2417 	directory. If both a supplied and \a path is relative it is reckoned off
2418 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2419 	ignored.
2420 
2421 	The caller has the responsibility to call put_vnode() on the returned
2422 	directory vnode.
2423 
2424 	\param vnode The vnode. May be \c NULL.
2425 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2426 	       is modified by this function. It must have at least room for a
2427 	       string one character longer than the path it contains.
2428 	\param _vnode A pointer to a variable the directory vnode shall be written
2429 		   into.
2430 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2431 		   the leaf name of the specified entry will be written.
2432 	\param kernel \c true, if invoked from inside the kernel, \c false if
2433 		   invoked from userland.
2434 	\return \c B_OK, if everything went fine, another error code otherwise.
2435 */
2436 static status_t
2437 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2438 	struct vnode** _vnode, char* filename, bool kernel)
2439 {
2440 	if (!path)
2441 		return B_BAD_VALUE;
2442 	if (*path == '\0')
2443 		return B_ENTRY_NOT_FOUND;
2444 	if (vnode == NULL || path[0] == '/')
2445 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2446 
2447 	status_t status = get_dir_path_and_leaf(path, filename);
2448 	if (status != B_OK)
2449 		return status;
2450 
2451 	inc_vnode_ref_count(vnode);
2452 		// vnode_path_to_vnode() always decrements the ref count
2453 
2454 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2455 }
2456 
2457 
2458 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2459 */
2460 static status_t
2461 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2462 	size_t bufferSize, struct io_context* ioContext)
2463 {
2464 	if (bufferSize < sizeof(struct dirent))
2465 		return B_BAD_VALUE;
2466 
2467 	// See if the vnode is convering another vnode and move to the covered
2468 	// vnode so we get the underlying file system
2469 	VNodePutter vnodePutter;
2470 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2471 		vnode = coveredVnode;
2472 		vnodePutter.SetTo(vnode);
2473 	}
2474 
2475 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2476 		// The FS supports getting the name of a vnode.
2477 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2478 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2479 			return B_OK;
2480 	}
2481 
2482 	// The FS doesn't support getting the name of a vnode. So we search the
2483 	// parent directory for the vnode, if the caller let us.
2484 
2485 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2486 		return B_UNSUPPORTED;
2487 
2488 	void* cookie;
2489 
2490 	status_t status = FS_CALL(parent, open_dir, &cookie);
2491 	if (status >= B_OK) {
2492 		while (true) {
2493 			uint32 num = 1;
2494 			// We use the FS hook directly instead of dir_read(), since we don't
2495 			// want the entries to be fixed. We have already resolved vnode to
2496 			// the covered node.
2497 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2498 				&num);
2499 			if (status != B_OK)
2500 				break;
2501 			if (num == 0) {
2502 				status = B_ENTRY_NOT_FOUND;
2503 				break;
2504 			}
2505 
2506 			if (vnode->id == buffer->d_ino) {
2507 				// found correct entry!
2508 				break;
2509 			}
2510 		}
2511 
2512 		FS_CALL(vnode, close_dir, cookie);
2513 		FS_CALL(vnode, free_dir_cookie, cookie);
2514 	}
2515 	return status;
2516 }
2517 
2518 
2519 static status_t
2520 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2521 	size_t nameSize, bool kernel)
2522 {
2523 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2524 	struct dirent* dirent = (struct dirent*)buffer;
2525 
2526 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2527 		get_current_io_context(kernel));
2528 	if (status != B_OK)
2529 		return status;
2530 
2531 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2532 		return B_BUFFER_OVERFLOW;
2533 
2534 	return B_OK;
2535 }
2536 
2537 
2538 /*!	Gets the full path to a given directory vnode.
2539 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2540 	file system doesn't support this call, it will fall back to iterating
2541 	through the parent directory to get the name of the child.
2542 
2543 	To protect against circular loops, it supports a maximum tree depth
2544 	of 256 levels.
2545 
2546 	Note that the path may not be correct the time this function returns!
2547 	It doesn't use any locking to prevent returning the correct path, as
2548 	paths aren't safe anyway: the path to a file can change at any time.
2549 
2550 	It might be a good idea, though, to check if the returned path exists
2551 	in the calling function (it's not done here because of efficiency)
2552 */
2553 static status_t
2554 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2555 	bool kernel)
2556 {
2557 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2558 
2559 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2560 		return B_BAD_VALUE;
2561 
2562 	if (!S_ISDIR(vnode->Type()))
2563 		return B_NOT_A_DIRECTORY;
2564 
2565 	char* path = buffer;
2566 	int32 insert = bufferSize;
2567 	int32 maxLevel = 256;
2568 	int32 length;
2569 	status_t status;
2570 	struct io_context* ioContext = get_current_io_context(kernel);
2571 
2572 	// we don't use get_vnode() here because this call is more
2573 	// efficient and does all we need from get_vnode()
2574 	inc_vnode_ref_count(vnode);
2575 
2576 	if (vnode != ioContext->root) {
2577 		// we don't hit the IO context root
2578 		// resolve a vnode to its covered vnode
2579 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2580 			put_vnode(vnode);
2581 			vnode = coveredVnode;
2582 		}
2583 	}
2584 
2585 	path[--insert] = '\0';
2586 		// the path is filled right to left
2587 
2588 	while (true) {
2589 		// the name buffer is also used for fs_read_dir()
2590 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2591 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2592 		struct vnode* parentVnode;
2593 		ino_t parentID;
2594 
2595 		// lookup the parent vnode
2596 		if (vnode == ioContext->root) {
2597 			// we hit the IO context root
2598 			parentVnode = vnode;
2599 			inc_vnode_ref_count(vnode);
2600 		} else {
2601 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2602 			if (status != B_OK)
2603 				goto out;
2604 		}
2605 
2606 		// get the node's name
2607 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2608 			sizeof(nameBuffer), ioContext);
2609 
2610 		if (vnode != ioContext->root) {
2611 			// we don't hit the IO context root
2612 			// resolve a vnode to its covered vnode
2613 			if (Vnode* coveredVnode = get_covered_vnode(parentVnode)) {
2614 				put_vnode(parentVnode);
2615 				parentVnode = coveredVnode;
2616 				parentID = parentVnode->id;
2617 			}
2618 		}
2619 
2620 		bool hitRoot = (parentVnode == vnode);
2621 
2622 		// release the current vnode, we only need its parent from now on
2623 		put_vnode(vnode);
2624 		vnode = parentVnode;
2625 
2626 		if (status != B_OK)
2627 			goto out;
2628 
2629 		if (hitRoot) {
2630 			// we have reached "/", which means we have constructed the full
2631 			// path
2632 			break;
2633 		}
2634 
2635 		// TODO: add an explicit check for loops in about 10 levels to do
2636 		// real loop detection
2637 
2638 		// don't go deeper as 'maxLevel' to prevent circular loops
2639 		if (maxLevel-- < 0) {
2640 			status = B_LINK_LIMIT;
2641 			goto out;
2642 		}
2643 
2644 		// add the name in front of the current path
2645 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2646 		length = strlen(name);
2647 		insert -= length;
2648 		if (insert <= 0) {
2649 			status = B_RESULT_NOT_REPRESENTABLE;
2650 			goto out;
2651 		}
2652 		memcpy(path + insert, name, length);
2653 		path[--insert] = '/';
2654 	}
2655 
2656 	// the root dir will result in an empty path: fix it
2657 	if (path[insert] == '\0')
2658 		path[--insert] = '/';
2659 
2660 	TRACE(("  path is: %s\n", path + insert));
2661 
2662 	// move the path to the start of the buffer
2663 	length = bufferSize - insert;
2664 	memmove(buffer, path + insert, length);
2665 
2666 out:
2667 	put_vnode(vnode);
2668 	return status;
2669 }
2670 
2671 
2672 /*!	Checks the length of every path component, and adds a '.'
2673 	if the path ends in a slash.
2674 	The given path buffer must be able to store at least one
2675 	additional character.
2676 */
2677 static status_t
2678 check_path(char* to)
2679 {
2680 	int32 length = 0;
2681 
2682 	// check length of every path component
2683 
2684 	while (*to) {
2685 		char* begin;
2686 		if (*to == '/')
2687 			to++, length++;
2688 
2689 		begin = to;
2690 		while (*to != '/' && *to)
2691 			to++, length++;
2692 
2693 		if (to - begin > B_FILE_NAME_LENGTH)
2694 			return B_NAME_TOO_LONG;
2695 	}
2696 
2697 	if (length == 0)
2698 		return B_ENTRY_NOT_FOUND;
2699 
2700 	// complete path if there is a slash at the end
2701 
2702 	if (*(to - 1) == '/') {
2703 		if (length > B_PATH_NAME_LENGTH - 2)
2704 			return B_NAME_TOO_LONG;
2705 
2706 		to[0] = '.';
2707 		to[1] = '\0';
2708 	}
2709 
2710 	return B_OK;
2711 }
2712 
2713 
2714 static struct file_descriptor*
2715 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2716 {
2717 	struct file_descriptor* descriptor
2718 		= get_fd(get_current_io_context(kernel), fd);
2719 	if (descriptor == NULL)
2720 		return NULL;
2721 
2722 	struct vnode* vnode = fd_vnode(descriptor);
2723 	if (vnode == NULL) {
2724 		put_fd(descriptor);
2725 		return NULL;
2726 	}
2727 
2728 	// ToDo: when we can close a file descriptor at any point, investigate
2729 	//	if this is still valid to do (accessing the vnode without ref_count
2730 	//	or locking)
2731 	*_vnode = vnode;
2732 	return descriptor;
2733 }
2734 
2735 
2736 static struct vnode*
2737 get_vnode_from_fd(int fd, bool kernel)
2738 {
2739 	struct file_descriptor* descriptor;
2740 	struct vnode* vnode;
2741 
2742 	descriptor = get_fd(get_current_io_context(kernel), fd);
2743 	if (descriptor == NULL)
2744 		return NULL;
2745 
2746 	vnode = fd_vnode(descriptor);
2747 	if (vnode != NULL)
2748 		inc_vnode_ref_count(vnode);
2749 
2750 	put_fd(descriptor);
2751 	return vnode;
2752 }
2753 
2754 
2755 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2756 	only the path will be considered. In this case, the \a path must not be
2757 	NULL.
2758 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2759 	and should be NULL for files.
2760 */
2761 static status_t
2762 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2763 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2764 {
2765 	if (fd < 0 && !path)
2766 		return B_BAD_VALUE;
2767 
2768 	if (path != NULL && *path == '\0')
2769 		return B_ENTRY_NOT_FOUND;
2770 
2771 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2772 		// no FD or absolute path
2773 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2774 	}
2775 
2776 	// FD only, or FD + relative path
2777 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2778 	if (!vnode)
2779 		return B_FILE_ERROR;
2780 
2781 	if (path != NULL) {
2782 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2783 			_vnode, _parentID);
2784 	}
2785 
2786 	// there is no relative path to take into account
2787 
2788 	*_vnode = vnode;
2789 	if (_parentID)
2790 		*_parentID = -1;
2791 
2792 	return B_OK;
2793 }
2794 
2795 
2796 static int
2797 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2798 	void* cookie, int openMode, bool kernel)
2799 {
2800 	struct file_descriptor* descriptor;
2801 	int fd;
2802 
2803 	// If the vnode is locked, we don't allow creating a new file/directory
2804 	// file_descriptor for it
2805 	if (vnode && vnode->mandatory_locked_by != NULL
2806 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2807 		return B_BUSY;
2808 
2809 	descriptor = alloc_fd();
2810 	if (!descriptor)
2811 		return B_NO_MEMORY;
2812 
2813 	if (vnode)
2814 		descriptor->u.vnode = vnode;
2815 	else
2816 		descriptor->u.mount = mount;
2817 	descriptor->cookie = cookie;
2818 
2819 	switch (type) {
2820 		// vnode types
2821 		case FDTYPE_FILE:
2822 			descriptor->ops = &sFileOps;
2823 			break;
2824 		case FDTYPE_DIR:
2825 			descriptor->ops = &sDirectoryOps;
2826 			break;
2827 		case FDTYPE_ATTR:
2828 			descriptor->ops = &sAttributeOps;
2829 			break;
2830 		case FDTYPE_ATTR_DIR:
2831 			descriptor->ops = &sAttributeDirectoryOps;
2832 			break;
2833 
2834 		// mount types
2835 		case FDTYPE_INDEX_DIR:
2836 			descriptor->ops = &sIndexDirectoryOps;
2837 			break;
2838 		case FDTYPE_QUERY:
2839 			descriptor->ops = &sQueryOps;
2840 			break;
2841 
2842 		default:
2843 			panic("get_new_fd() called with unknown type %d\n", type);
2844 			break;
2845 	}
2846 	descriptor->type = type;
2847 	descriptor->open_mode = openMode;
2848 
2849 	io_context* context = get_current_io_context(kernel);
2850 	fd = new_fd(context, descriptor);
2851 	if (fd < 0) {
2852 		free(descriptor);
2853 		return B_NO_MORE_FDS;
2854 	}
2855 
2856 	mutex_lock(&context->io_mutex);
2857 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2858 	mutex_unlock(&context->io_mutex);
2859 
2860 	return fd;
2861 }
2862 
2863 
2864 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2865 	vfs_normalize_path(). See there for more documentation.
2866 */
2867 static status_t
2868 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2869 {
2870 	VNodePutter dirPutter;
2871 	struct vnode* dir = NULL;
2872 	status_t error;
2873 
2874 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2875 		// get dir vnode + leaf name
2876 		struct vnode* nextDir;
2877 		char leaf[B_FILE_NAME_LENGTH];
2878 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2879 		if (error != B_OK)
2880 			return error;
2881 
2882 		dir = nextDir;
2883 		strcpy(path, leaf);
2884 		dirPutter.SetTo(dir);
2885 
2886 		// get file vnode, if we shall resolve links
2887 		bool fileExists = false;
2888 		struct vnode* fileVnode;
2889 		VNodePutter fileVnodePutter;
2890 		if (traverseLink) {
2891 			inc_vnode_ref_count(dir);
2892 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2893 					NULL) == B_OK) {
2894 				fileVnodePutter.SetTo(fileVnode);
2895 				fileExists = true;
2896 			}
2897 		}
2898 
2899 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2900 			// we're done -- construct the path
2901 			bool hasLeaf = true;
2902 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2903 				// special cases "." and ".." -- get the dir, forget the leaf
2904 				inc_vnode_ref_count(dir);
2905 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2906 					&nextDir, NULL);
2907 				if (error != B_OK)
2908 					return error;
2909 				dir = nextDir;
2910 				dirPutter.SetTo(dir);
2911 				hasLeaf = false;
2912 			}
2913 
2914 			// get the directory path
2915 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2916 			if (error != B_OK)
2917 				return error;
2918 
2919 			// append the leaf name
2920 			if (hasLeaf) {
2921 				// insert a directory separator if this is not the file system
2922 				// root
2923 				if ((strcmp(path, "/") != 0
2924 					&& strlcat(path, "/", pathSize) >= pathSize)
2925 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2926 					return B_NAME_TOO_LONG;
2927 				}
2928 			}
2929 
2930 			return B_OK;
2931 		}
2932 
2933 		// read link
2934 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2935 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2936 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2937 			if (error != B_OK)
2938 				return error;
2939 			path[bufferSize] = '\0';
2940 		} else
2941 			return B_BAD_VALUE;
2942 	}
2943 
2944 	return B_LINK_LIMIT;
2945 }
2946 
2947 
2948 #ifdef ADD_DEBUGGER_COMMANDS
2949 
2950 
2951 static void
2952 _dump_advisory_locking(advisory_locking* locking)
2953 {
2954 	if (locking == NULL)
2955 		return;
2956 
2957 	kprintf("   lock:        %ld", locking->lock);
2958 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2959 
2960 	int32 index = 0;
2961 	LockList::Iterator iterator = locking->locks.GetIterator();
2962 	while (iterator.HasNext()) {
2963 		struct advisory_lock* lock = iterator.Next();
2964 
2965 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2966 		kprintf("        start:  %Ld\n", lock->start);
2967 		kprintf("        end:    %Ld\n", lock->end);
2968 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2969 	}
2970 }
2971 
2972 
2973 static void
2974 _dump_mount(struct fs_mount* mount)
2975 {
2976 	kprintf("MOUNT: %p\n", mount);
2977 	kprintf(" id:            %ld\n", mount->id);
2978 	kprintf(" device_name:   %s\n", mount->device_name);
2979 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2980 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
2981 	kprintf(" partition:     %p\n", mount->partition);
2982 	kprintf(" lock:          %p\n", &mount->rlock);
2983 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2984 		mount->owns_file_device ? " owns_file_device" : "");
2985 
2986 	fs_volume* volume = mount->volume;
2987 	while (volume != NULL) {
2988 		kprintf(" volume %p:\n", volume);
2989 		kprintf("  layer:            %ld\n", volume->layer);
2990 		kprintf("  private_volume:   %p\n", volume->private_volume);
2991 		kprintf("  ops:              %p\n", volume->ops);
2992 		kprintf("  file_system:      %p\n", volume->file_system);
2993 		kprintf("  file_system_name: %s\n", volume->file_system_name);
2994 		volume = volume->super_volume;
2995 	}
2996 
2997 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
2998 	set_debug_variable("_root", (addr_t)mount->root_vnode);
2999 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3000 	set_debug_variable("_partition", (addr_t)mount->partition);
3001 }
3002 
3003 
3004 static bool
3005 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3006 	const char* name)
3007 {
3008 	bool insertSlash = buffer[bufferSize] != '\0';
3009 	size_t nameLength = strlen(name);
3010 
3011 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3012 		return false;
3013 
3014 	if (insertSlash)
3015 		buffer[--bufferSize] = '/';
3016 
3017 	bufferSize -= nameLength;
3018 	memcpy(buffer + bufferSize, name, nameLength);
3019 
3020 	return true;
3021 }
3022 
3023 
3024 static bool
3025 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3026 	ino_t nodeID)
3027 {
3028 	if (bufferSize == 0)
3029 		return false;
3030 
3031 	bool insertSlash = buffer[bufferSize] != '\0';
3032 	if (insertSlash)
3033 		buffer[--bufferSize] = '/';
3034 
3035 	size_t size = snprintf(buffer, bufferSize,
3036 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3037 	if (size > bufferSize) {
3038 		if (insertSlash)
3039 			bufferSize++;
3040 		return false;
3041 	}
3042 
3043 	if (size < bufferSize)
3044 		memmove(buffer + bufferSize - size, buffer, size);
3045 
3046 	bufferSize -= size;
3047 	return true;
3048 }
3049 
3050 
3051 static char*
3052 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3053 	bool& _truncated)
3054 {
3055 	// null-terminate the path
3056 	buffer[--bufferSize] = '\0';
3057 
3058 	while (true) {
3059 		while (vnode->covers != NULL)
3060 			vnode = vnode->covers;
3061 
3062 		if (vnode == sRoot) {
3063 			_truncated = bufferSize == 0;
3064 			if (!_truncated)
3065 				buffer[--bufferSize] = '/';
3066 			return buffer + bufferSize;
3067 		}
3068 
3069 		// resolve the name
3070 		ino_t dirID;
3071 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3072 			vnode->id, dirID);
3073 		if (name == NULL) {
3074 			// Failed to resolve the name -- prepend "<dev,node>/".
3075 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3076 				vnode->mount->id, vnode->id);
3077 			return buffer + bufferSize;
3078 		}
3079 
3080 		// prepend the name
3081 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3082 			_truncated = true;
3083 			return buffer + bufferSize;
3084 		}
3085 
3086 		// resolve the directory node
3087 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3088 		if (nextVnode == NULL) {
3089 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3090 				vnode->mount->id, dirID);
3091 			return buffer + bufferSize;
3092 		}
3093 
3094 		vnode = nextVnode;
3095 	}
3096 }
3097 
3098 
3099 static void
3100 _dump_vnode(struct vnode* vnode, bool printPath)
3101 {
3102 	kprintf("VNODE: %p\n", vnode);
3103 	kprintf(" device:        %ld\n", vnode->device);
3104 	kprintf(" id:            %Ld\n", vnode->id);
3105 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3106 	kprintf(" private_node:  %p\n", vnode->private_node);
3107 	kprintf(" mount:         %p\n", vnode->mount);
3108 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3109 	kprintf(" covers:        %p\n", vnode->covers);
3110 	kprintf(" cache:         %p\n", vnode->cache);
3111 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3112 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3113 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3114 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3115 
3116 	_dump_advisory_locking(vnode->advisory_locking);
3117 
3118 	if (printPath) {
3119 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3120 		if (buffer != NULL) {
3121 			bool truncated;
3122 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3123 				B_PATH_NAME_LENGTH, truncated);
3124 			if (path != NULL) {
3125 				kprintf(" path:          ");
3126 				if (truncated)
3127 					kputs("<truncated>/");
3128 				kputs(path);
3129 				kputs("\n");
3130 			} else
3131 				kprintf("Failed to resolve vnode path.\n");
3132 
3133 			debug_free(buffer);
3134 		} else
3135 			kprintf("Failed to allocate memory for constructing the path.\n");
3136 	}
3137 
3138 	set_debug_variable("_node", (addr_t)vnode->private_node);
3139 	set_debug_variable("_mount", (addr_t)vnode->mount);
3140 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3141 	set_debug_variable("_covers", (addr_t)vnode->covers);
3142 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3143 }
3144 
3145 
3146 static int
3147 dump_mount(int argc, char** argv)
3148 {
3149 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3150 		kprintf("usage: %s [id|address]\n", argv[0]);
3151 		return 0;
3152 	}
3153 
3154 	uint32 id = parse_expression(argv[1]);
3155 	struct fs_mount* mount = NULL;
3156 
3157 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3158 	if (mount == NULL) {
3159 		if (IS_USER_ADDRESS(id)) {
3160 			kprintf("fs_mount not found\n");
3161 			return 0;
3162 		}
3163 		mount = (fs_mount*)id;
3164 	}
3165 
3166 	_dump_mount(mount);
3167 	return 0;
3168 }
3169 
3170 
3171 static int
3172 dump_mounts(int argc, char** argv)
3173 {
3174 	if (argc != 1) {
3175 		kprintf("usage: %s\n", argv[0]);
3176 		return 0;
3177 	}
3178 
3179 	kprintf("address     id root       covers     cookie     fs_name\n");
3180 
3181 	struct hash_iterator iterator;
3182 	struct fs_mount* mount;
3183 
3184 	hash_open(sMountsTable, &iterator);
3185 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3186 			!= NULL) {
3187 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3188 			mount->root_vnode->covers, mount->volume->private_volume,
3189 			mount->volume->file_system_name);
3190 
3191 		fs_volume* volume = mount->volume;
3192 		while (volume->super_volume != NULL) {
3193 			volume = volume->super_volume;
3194 			kprintf("                                     %p %s\n",
3195 				volume->private_volume, volume->file_system_name);
3196 		}
3197 	}
3198 
3199 	hash_close(sMountsTable, &iterator, false);
3200 	return 0;
3201 }
3202 
3203 
3204 static int
3205 dump_vnode(int argc, char** argv)
3206 {
3207 	bool printPath = false;
3208 	int argi = 1;
3209 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3210 		printPath = true;
3211 		argi++;
3212 	}
3213 
3214 	if (argi >= argc || argi + 2 < argc) {
3215 		print_debugger_command_usage(argv[0]);
3216 		return 0;
3217 	}
3218 
3219 	struct vnode* vnode = NULL;
3220 
3221 	if (argi + 1 == argc) {
3222 		vnode = (struct vnode*)parse_expression(argv[argi]);
3223 		if (IS_USER_ADDRESS(vnode)) {
3224 			kprintf("invalid vnode address\n");
3225 			return 0;
3226 		}
3227 		_dump_vnode(vnode, printPath);
3228 		return 0;
3229 	}
3230 
3231 	struct hash_iterator iterator;
3232 	dev_t device = parse_expression(argv[argi]);
3233 	ino_t id = parse_expression(argv[argi + 1]);
3234 
3235 	hash_open(sVnodeTable, &iterator);
3236 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3237 		if (vnode->id != id || vnode->device != device)
3238 			continue;
3239 
3240 		_dump_vnode(vnode, printPath);
3241 	}
3242 
3243 	hash_close(sVnodeTable, &iterator, false);
3244 	return 0;
3245 }
3246 
3247 
3248 static int
3249 dump_vnodes(int argc, char** argv)
3250 {
3251 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3252 		kprintf("usage: %s [device]\n", argv[0]);
3253 		return 0;
3254 	}
3255 
3256 	// restrict dumped nodes to a certain device if requested
3257 	dev_t device = parse_expression(argv[1]);
3258 
3259 	struct hash_iterator iterator;
3260 	struct vnode* vnode;
3261 
3262 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3263 		"flags\n");
3264 
3265 	hash_open(sVnodeTable, &iterator);
3266 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3267 		if (vnode->device != device)
3268 			continue;
3269 
3270 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3271 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3272 			vnode->advisory_locking, vnode->IsRemoved() ? "r" : "-",
3273 			vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3274 	}
3275 
3276 	hash_close(sVnodeTable, &iterator, false);
3277 	return 0;
3278 }
3279 
3280 
3281 static int
3282 dump_vnode_caches(int argc, char** argv)
3283 {
3284 	struct hash_iterator iterator;
3285 	struct vnode* vnode;
3286 
3287 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3288 		kprintf("usage: %s [device]\n", argv[0]);
3289 		return 0;
3290 	}
3291 
3292 	// restrict dumped nodes to a certain device if requested
3293 	dev_t device = -1;
3294 	if (argc > 1)
3295 		device = parse_expression(argv[1]);
3296 
3297 	kprintf("address    dev     inode cache          size   pages\n");
3298 
3299 	hash_open(sVnodeTable, &iterator);
3300 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3301 		if (vnode->cache == NULL)
3302 			continue;
3303 		if (device != -1 && vnode->device != device)
3304 			continue;
3305 
3306 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3307 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3308 				/ B_PAGE_SIZE, vnode->cache->page_count);
3309 	}
3310 
3311 	hash_close(sVnodeTable, &iterator, false);
3312 	return 0;
3313 }
3314 
3315 
3316 int
3317 dump_io_context(int argc, char** argv)
3318 {
3319 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3320 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3321 		return 0;
3322 	}
3323 
3324 	struct io_context* context = NULL;
3325 
3326 	if (argc > 1) {
3327 		uint32 num = parse_expression(argv[1]);
3328 		if (IS_KERNEL_ADDRESS(num))
3329 			context = (struct io_context*)num;
3330 		else {
3331 			Team* team = team_get_team_struct_locked(num);
3332 			if (team == NULL) {
3333 				kprintf("could not find team with ID %ld\n", num);
3334 				return 0;
3335 			}
3336 			context = (struct io_context*)team->io_context;
3337 		}
3338 	} else
3339 		context = get_current_io_context(true);
3340 
3341 	kprintf("I/O CONTEXT: %p\n", context);
3342 	kprintf(" root vnode:\t%p\n", context->root);
3343 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3344 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3345 	kprintf(" max fds:\t%lu\n", context->table_size);
3346 
3347 	if (context->num_used_fds)
3348 		kprintf("   no.  type         ops  ref  open  mode         pos"
3349 			"      cookie\n");
3350 
3351 	for (uint32 i = 0; i < context->table_size; i++) {
3352 		struct file_descriptor* fd = context->fds[i];
3353 		if (fd == NULL)
3354 			continue;
3355 
3356 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3357 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3358 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3359 			fd->pos, fd->cookie,
3360 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3361 				? "mount" : "vnode",
3362 			fd->u.vnode);
3363 	}
3364 
3365 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3366 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3367 
3368 	set_debug_variable("_cwd", (addr_t)context->cwd);
3369 
3370 	return 0;
3371 }
3372 
3373 
3374 int
3375 dump_vnode_usage(int argc, char** argv)
3376 {
3377 	if (argc != 1) {
3378 		kprintf("usage: %s\n", argv[0]);
3379 		return 0;
3380 	}
3381 
3382 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3383 		kMaxUnusedVnodes);
3384 
3385 	struct hash_iterator iterator;
3386 	hash_open(sVnodeTable, &iterator);
3387 
3388 	uint32 count = 0;
3389 	struct vnode* vnode;
3390 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3391 		count++;
3392 	}
3393 
3394 	hash_close(sVnodeTable, &iterator, false);
3395 
3396 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3397 	return 0;
3398 }
3399 
3400 #endif	// ADD_DEBUGGER_COMMANDS
3401 
3402 /*!	Clears an iovec array of physical pages.
3403 	Returns in \a _bytes the number of bytes successfully cleared.
3404 */
3405 static status_t
3406 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3407 {
3408 	size_t bytes = *_bytes;
3409 	size_t index = 0;
3410 
3411 	while (bytes > 0) {
3412 		size_t length = min_c(vecs[index].iov_len, bytes);
3413 
3414 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3415 			length);
3416 		if (status != B_OK) {
3417 			*_bytes -= bytes;
3418 			return status;
3419 		}
3420 
3421 		bytes -= length;
3422 	}
3423 
3424 	return B_OK;
3425 }
3426 
3427 
3428 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3429 	and calls the file system hooks to read/write the request to disk.
3430 */
3431 static status_t
3432 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3433 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3434 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3435 	bool doWrite)
3436 {
3437 	if (fileVecCount == 0) {
3438 		// There are no file vecs at this offset, so we're obviously trying
3439 		// to access the file outside of its bounds
3440 		return B_BAD_VALUE;
3441 	}
3442 
3443 	size_t numBytes = *_numBytes;
3444 	uint32 fileVecIndex;
3445 	size_t vecOffset = *_vecOffset;
3446 	uint32 vecIndex = *_vecIndex;
3447 	status_t status;
3448 	size_t size;
3449 
3450 	if (!doWrite && vecOffset == 0) {
3451 		// now directly read the data from the device
3452 		// the first file_io_vec can be read directly
3453 
3454 		if (fileVecs[0].length < numBytes)
3455 			size = fileVecs[0].length;
3456 		else
3457 			size = numBytes;
3458 
3459 		if (fileVecs[0].offset >= 0) {
3460 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3461 				&vecs[vecIndex], vecCount - vecIndex, &size);
3462 		} else {
3463 			// sparse read
3464 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3465 		}
3466 		if (status != B_OK)
3467 			return status;
3468 
3469 		// TODO: this is a work-around for buggy device drivers!
3470 		//	When our own drivers honour the length, we can:
3471 		//	a) also use this direct I/O for writes (otherwise, it would
3472 		//	   overwrite precious data)
3473 		//	b) panic if the term below is true (at least for writes)
3474 		if (size > fileVecs[0].length) {
3475 			//dprintf("warning: device driver %p doesn't respect total length "
3476 			//	"in read_pages() call!\n", ref->device);
3477 			size = fileVecs[0].length;
3478 		}
3479 
3480 		ASSERT(size <= fileVecs[0].length);
3481 
3482 		// If the file portion was contiguous, we're already done now
3483 		if (size == numBytes)
3484 			return B_OK;
3485 
3486 		// if we reached the end of the file, we can return as well
3487 		if (size != fileVecs[0].length) {
3488 			*_numBytes = size;
3489 			return B_OK;
3490 		}
3491 
3492 		fileVecIndex = 1;
3493 
3494 		// first, find out where we have to continue in our iovecs
3495 		for (; vecIndex < vecCount; vecIndex++) {
3496 			if (size < vecs[vecIndex].iov_len)
3497 				break;
3498 
3499 			size -= vecs[vecIndex].iov_len;
3500 		}
3501 
3502 		vecOffset = size;
3503 	} else {
3504 		fileVecIndex = 0;
3505 		size = 0;
3506 	}
3507 
3508 	// Too bad, let's process the rest of the file_io_vecs
3509 
3510 	size_t totalSize = size;
3511 	size_t bytesLeft = numBytes - size;
3512 
3513 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3514 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3515 		off_t fileOffset = fileVec.offset;
3516 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3517 
3518 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3519 
3520 		// process the complete fileVec
3521 		while (fileLeft > 0) {
3522 			iovec tempVecs[MAX_TEMP_IO_VECS];
3523 			uint32 tempCount = 0;
3524 
3525 			// size tracks how much of what is left of the current fileVec
3526 			// (fileLeft) has been assigned to tempVecs
3527 			size = 0;
3528 
3529 			// assign what is left of the current fileVec to the tempVecs
3530 			for (size = 0; size < fileLeft && vecIndex < vecCount
3531 					&& tempCount < MAX_TEMP_IO_VECS;) {
3532 				// try to satisfy one iovec per iteration (or as much as
3533 				// possible)
3534 
3535 				// bytes left of the current iovec
3536 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3537 				if (vecLeft == 0) {
3538 					vecOffset = 0;
3539 					vecIndex++;
3540 					continue;
3541 				}
3542 
3543 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3544 					vecIndex, vecOffset, size));
3545 
3546 				// actually available bytes
3547 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3548 
3549 				tempVecs[tempCount].iov_base
3550 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3551 				tempVecs[tempCount].iov_len = tempVecSize;
3552 				tempCount++;
3553 
3554 				size += tempVecSize;
3555 				vecOffset += tempVecSize;
3556 			}
3557 
3558 			size_t bytes = size;
3559 
3560 			if (fileOffset == -1) {
3561 				if (doWrite) {
3562 					panic("sparse write attempt: vnode %p", vnode);
3563 					status = B_IO_ERROR;
3564 				} else {
3565 					// sparse read
3566 					status = zero_pages(tempVecs, tempCount, &bytes);
3567 				}
3568 			} else if (doWrite) {
3569 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3570 					tempVecs, tempCount, &bytes);
3571 			} else {
3572 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3573 					tempVecs, tempCount, &bytes);
3574 			}
3575 			if (status != B_OK)
3576 				return status;
3577 
3578 			totalSize += bytes;
3579 			bytesLeft -= size;
3580 			if (fileOffset >= 0)
3581 				fileOffset += size;
3582 			fileLeft -= size;
3583 			//dprintf("-> file left = %Lu\n", fileLeft);
3584 
3585 			if (size != bytes || vecIndex >= vecCount) {
3586 				// there are no more bytes or iovecs, let's bail out
3587 				*_numBytes = totalSize;
3588 				return B_OK;
3589 			}
3590 		}
3591 	}
3592 
3593 	*_vecIndex = vecIndex;
3594 	*_vecOffset = vecOffset;
3595 	*_numBytes = totalSize;
3596 	return B_OK;
3597 }
3598 
3599 
3600 //	#pragma mark - public API for file systems
3601 
3602 
3603 extern "C" status_t
3604 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3605 	fs_vnode_ops* ops)
3606 {
3607 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3608 		volume, volume->id, vnodeID, privateNode));
3609 
3610 	if (privateNode == NULL)
3611 		return B_BAD_VALUE;
3612 
3613 	// create the node
3614 	bool nodeCreated;
3615 	struct vnode* vnode;
3616 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3617 		nodeCreated);
3618 	if (status != B_OK)
3619 		return status;
3620 
3621 	WriteLocker nodeLocker(sVnodeLock, true);
3622 		// create_new_vnode_and_lock() has locked for us
3623 
3624 	// file system integrity check:
3625 	// test if the vnode already exists and bail out if this is the case!
3626 	if (!nodeCreated) {
3627 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3628 			volume->id, vnodeID, privateNode, vnode->private_node);
3629 		return B_ERROR;
3630 	}
3631 
3632 	vnode->private_node = privateNode;
3633 	vnode->ops = ops;
3634 	vnode->SetUnpublished(true);
3635 
3636 	TRACE(("returns: %s\n", strerror(status)));
3637 
3638 	return status;
3639 }
3640 
3641 
3642 extern "C" status_t
3643 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3644 	fs_vnode_ops* ops, int type, uint32 flags)
3645 {
3646 	FUNCTION(("publish_vnode()\n"));
3647 
3648 	WriteLocker locker(sVnodeLock);
3649 
3650 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3651 
3652 	bool nodeCreated = false;
3653 	if (vnode == NULL) {
3654 		if (privateNode == NULL)
3655 			return B_BAD_VALUE;
3656 
3657 		// create the node
3658 		locker.Unlock();
3659 			// create_new_vnode_and_lock() will re-lock for us on success
3660 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3661 			nodeCreated);
3662 		if (status != B_OK)
3663 			return status;
3664 
3665 		locker.SetTo(sVnodeLock, true);
3666 	}
3667 
3668 	if (nodeCreated) {
3669 		vnode->private_node = privateNode;
3670 		vnode->ops = ops;
3671 		vnode->SetUnpublished(true);
3672 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3673 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3674 		// already known, but not published
3675 	} else
3676 		return B_BAD_VALUE;
3677 
3678 	bool publishSpecialSubNode = false;
3679 
3680 	vnode->SetType(type);
3681 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3682 	publishSpecialSubNode = is_special_node_type(type)
3683 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3684 
3685 	status_t status = B_OK;
3686 
3687 	// create sub vnodes, if necessary
3688 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3689 		locker.Unlock();
3690 
3691 		fs_volume* subVolume = volume;
3692 		if (volume->sub_volume != NULL) {
3693 			while (status == B_OK && subVolume->sub_volume != NULL) {
3694 				subVolume = subVolume->sub_volume;
3695 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3696 					vnode);
3697 			}
3698 		}
3699 
3700 		if (status == B_OK && publishSpecialSubNode)
3701 			status = create_special_sub_node(vnode, flags);
3702 
3703 		if (status != B_OK) {
3704 			// error -- clean up the created sub vnodes
3705 			while (subVolume->super_volume != volume) {
3706 				subVolume = subVolume->super_volume;
3707 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3708 			}
3709 		}
3710 
3711 		if (status == B_OK) {
3712 			ReadLocker vnodesReadLocker(sVnodeLock);
3713 			AutoLocker<Vnode> nodeLocker(vnode);
3714 			vnode->SetBusy(false);
3715 			vnode->SetUnpublished(false);
3716 		} else {
3717 			locker.Lock();
3718 			hash_remove(sVnodeTable, vnode);
3719 			remove_vnode_from_mount_list(vnode, vnode->mount);
3720 			free(vnode);
3721 		}
3722 	} else {
3723 		// we still hold the write lock -- mark the node unbusy and published
3724 		vnode->SetBusy(false);
3725 		vnode->SetUnpublished(false);
3726 	}
3727 
3728 	TRACE(("returns: %s\n", strerror(status)));
3729 
3730 	return status;
3731 }
3732 
3733 
3734 extern "C" status_t
3735 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3736 {
3737 	struct vnode* vnode;
3738 
3739 	if (volume == NULL)
3740 		return B_BAD_VALUE;
3741 
3742 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3743 	if (status != B_OK)
3744 		return status;
3745 
3746 	// If this is a layered FS, we need to get the node cookie for the requested
3747 	// layer.
3748 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3749 		fs_vnode resolvedNode;
3750 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3751 			&resolvedNode);
3752 		if (status != B_OK) {
3753 			panic("get_vnode(): Failed to get super node for vnode %p, "
3754 				"volume: %p", vnode, volume);
3755 			put_vnode(vnode);
3756 			return status;
3757 		}
3758 
3759 		if (_privateNode != NULL)
3760 			*_privateNode = resolvedNode.private_node;
3761 	} else if (_privateNode != NULL)
3762 		*_privateNode = vnode->private_node;
3763 
3764 	return B_OK;
3765 }
3766 
3767 
3768 extern "C" status_t
3769 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3770 {
3771 	struct vnode* vnode;
3772 
3773 	rw_lock_read_lock(&sVnodeLock);
3774 	vnode = lookup_vnode(volume->id, vnodeID);
3775 	rw_lock_read_unlock(&sVnodeLock);
3776 
3777 	if (vnode == NULL)
3778 		return B_BAD_VALUE;
3779 
3780 	inc_vnode_ref_count(vnode);
3781 	return B_OK;
3782 }
3783 
3784 
3785 extern "C" status_t
3786 put_vnode(fs_volume* volume, ino_t vnodeID)
3787 {
3788 	struct vnode* vnode;
3789 
3790 	rw_lock_read_lock(&sVnodeLock);
3791 	vnode = lookup_vnode(volume->id, vnodeID);
3792 	rw_lock_read_unlock(&sVnodeLock);
3793 
3794 	if (vnode == NULL)
3795 		return B_BAD_VALUE;
3796 
3797 	dec_vnode_ref_count(vnode, false, true);
3798 	return B_OK;
3799 }
3800 
3801 
3802 extern "C" status_t
3803 remove_vnode(fs_volume* volume, ino_t vnodeID)
3804 {
3805 	ReadLocker locker(sVnodeLock);
3806 
3807 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3808 	if (vnode == NULL)
3809 		return B_ENTRY_NOT_FOUND;
3810 
3811 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3812 		// this vnode is in use
3813 		return B_BUSY;
3814 	}
3815 
3816 	vnode->Lock();
3817 
3818 	vnode->SetRemoved(true);
3819 	bool removeUnpublished = false;
3820 
3821 	if (vnode->IsUnpublished()) {
3822 		// prepare the vnode for deletion
3823 		removeUnpublished = true;
3824 		vnode->SetBusy(true);
3825 	}
3826 
3827 	vnode->Unlock();
3828 	locker.Unlock();
3829 
3830 	if (removeUnpublished) {
3831 		// If the vnode hasn't been published yet, we delete it here
3832 		atomic_add(&vnode->ref_count, -1);
3833 		free_vnode(vnode, true);
3834 	}
3835 
3836 	return B_OK;
3837 }
3838 
3839 
3840 extern "C" status_t
3841 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3842 {
3843 	struct vnode* vnode;
3844 
3845 	rw_lock_read_lock(&sVnodeLock);
3846 
3847 	vnode = lookup_vnode(volume->id, vnodeID);
3848 	if (vnode) {
3849 		AutoLocker<Vnode> nodeLocker(vnode);
3850 		vnode->SetRemoved(false);
3851 	}
3852 
3853 	rw_lock_read_unlock(&sVnodeLock);
3854 	return B_OK;
3855 }
3856 
3857 
3858 extern "C" status_t
3859 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3860 {
3861 	ReadLocker _(sVnodeLock);
3862 
3863 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3864 		if (_removed != NULL)
3865 			*_removed = vnode->IsRemoved();
3866 		return B_OK;
3867 	}
3868 
3869 	return B_BAD_VALUE;
3870 }
3871 
3872 
3873 extern "C" fs_volume*
3874 volume_for_vnode(fs_vnode* _vnode)
3875 {
3876 	if (_vnode == NULL)
3877 		return NULL;
3878 
3879 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3880 	return vnode->mount->volume;
3881 }
3882 
3883 
3884 #if 0
3885 extern "C" status_t
3886 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3887 	size_t* _numBytes)
3888 {
3889 	struct file_descriptor* descriptor;
3890 	struct vnode* vnode;
3891 
3892 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3893 	if (descriptor == NULL)
3894 		return B_FILE_ERROR;
3895 
3896 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3897 		count, 0, _numBytes);
3898 
3899 	put_fd(descriptor);
3900 	return status;
3901 }
3902 
3903 
3904 extern "C" status_t
3905 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3906 	size_t* _numBytes)
3907 {
3908 	struct file_descriptor* descriptor;
3909 	struct vnode* vnode;
3910 
3911 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3912 	if (descriptor == NULL)
3913 		return B_FILE_ERROR;
3914 
3915 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3916 		count, 0, _numBytes);
3917 
3918 	put_fd(descriptor);
3919 	return status;
3920 }
3921 #endif
3922 
3923 
3924 extern "C" status_t
3925 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3926 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3927 	size_t* _bytes)
3928 {
3929 	struct file_descriptor* descriptor;
3930 	struct vnode* vnode;
3931 
3932 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3933 	if (descriptor == NULL)
3934 		return B_FILE_ERROR;
3935 
3936 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3937 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3938 		false);
3939 
3940 	put_fd(descriptor);
3941 	return status;
3942 }
3943 
3944 
3945 extern "C" status_t
3946 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3947 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3948 	size_t* _bytes)
3949 {
3950 	struct file_descriptor* descriptor;
3951 	struct vnode* vnode;
3952 
3953 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3954 	if (descriptor == NULL)
3955 		return B_FILE_ERROR;
3956 
3957 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3958 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3959 		true);
3960 
3961 	put_fd(descriptor);
3962 	return status;
3963 }
3964 
3965 
3966 extern "C" status_t
3967 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3968 {
3969 	// lookup mount -- the caller is required to make sure that the mount
3970 	// won't go away
3971 	MutexLocker locker(sMountMutex);
3972 	struct fs_mount* mount = find_mount(mountID);
3973 	if (mount == NULL)
3974 		return B_BAD_VALUE;
3975 	locker.Unlock();
3976 
3977 	return mount->entry_cache.Add(dirID, name, nodeID);
3978 }
3979 
3980 
3981 extern "C" status_t
3982 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3983 {
3984 	// lookup mount -- the caller is required to make sure that the mount
3985 	// won't go away
3986 	MutexLocker locker(sMountMutex);
3987 	struct fs_mount* mount = find_mount(mountID);
3988 	if (mount == NULL)
3989 		return B_BAD_VALUE;
3990 	locker.Unlock();
3991 
3992 	return mount->entry_cache.Remove(dirID, name);
3993 }
3994 
3995 
3996 //	#pragma mark - private VFS API
3997 //	Functions the VFS exports for other parts of the kernel
3998 
3999 
4000 /*! Acquires another reference to the vnode that has to be released
4001 	by calling vfs_put_vnode().
4002 */
4003 void
4004 vfs_acquire_vnode(struct vnode* vnode)
4005 {
4006 	inc_vnode_ref_count(vnode);
4007 }
4008 
4009 
4010 /*! This is currently called from file_cache_create() only.
4011 	It's probably a temporary solution as long as devfs requires that
4012 	fs_read_pages()/fs_write_pages() are called with the standard
4013 	open cookie and not with a device cookie.
4014 	If that's done differently, remove this call; it has no other
4015 	purpose.
4016 */
4017 extern "C" status_t
4018 vfs_get_cookie_from_fd(int fd, void** _cookie)
4019 {
4020 	struct file_descriptor* descriptor;
4021 
4022 	descriptor = get_fd(get_current_io_context(true), fd);
4023 	if (descriptor == NULL)
4024 		return B_FILE_ERROR;
4025 
4026 	*_cookie = descriptor->cookie;
4027 	return B_OK;
4028 }
4029 
4030 
4031 extern "C" status_t
4032 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4033 {
4034 	*vnode = get_vnode_from_fd(fd, kernel);
4035 
4036 	if (*vnode == NULL)
4037 		return B_FILE_ERROR;
4038 
4039 	return B_NO_ERROR;
4040 }
4041 
4042 
4043 extern "C" status_t
4044 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4045 {
4046 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4047 		path, kernel));
4048 
4049 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4050 	if (pathBuffer.InitCheck() != B_OK)
4051 		return B_NO_MEMORY;
4052 
4053 	char* buffer = pathBuffer.LockBuffer();
4054 	strlcpy(buffer, path, pathBuffer.BufferSize());
4055 
4056 	struct vnode* vnode;
4057 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4058 	if (status != B_OK)
4059 		return status;
4060 
4061 	*_vnode = vnode;
4062 	return B_OK;
4063 }
4064 
4065 
4066 extern "C" status_t
4067 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4068 {
4069 	struct vnode* vnode;
4070 
4071 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4072 	if (status != B_OK)
4073 		return status;
4074 
4075 	*_vnode = vnode;
4076 	return B_OK;
4077 }
4078 
4079 
4080 extern "C" status_t
4081 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4082 	const char* name, struct vnode** _vnode)
4083 {
4084 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4085 }
4086 
4087 
4088 extern "C" void
4089 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4090 {
4091 	*_mountID = vnode->device;
4092 	*_vnodeID = vnode->id;
4093 }
4094 
4095 
4096 /*!
4097 	Helper function abstracting the process of "converting" a given
4098 	vnode-pointer to a fs_vnode-pointer.
4099 	Currently only used in bindfs.
4100 */
4101 extern "C" fs_vnode*
4102 vfs_fsnode_for_vnode(struct vnode* vnode)
4103 {
4104 	return vnode;
4105 }
4106 
4107 
4108 /*!
4109 	Calls fs_open() on the given vnode and returns a new
4110 	file descriptor for it
4111 */
4112 int
4113 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4114 {
4115 	return open_vnode(vnode, openMode, kernel);
4116 }
4117 
4118 
4119 /*!	Looks up a vnode with the given mount and vnode ID.
4120 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4121 	to the node.
4122 	It's currently only be used by file_cache_create().
4123 */
4124 extern "C" status_t
4125 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4126 {
4127 	rw_lock_read_lock(&sVnodeLock);
4128 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4129 	rw_lock_read_unlock(&sVnodeLock);
4130 
4131 	if (vnode == NULL)
4132 		return B_ERROR;
4133 
4134 	*_vnode = vnode;
4135 	return B_OK;
4136 }
4137 
4138 
4139 extern "C" status_t
4140 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4141 	bool traverseLeafLink, bool kernel, void** _node)
4142 {
4143 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4144 		volume, path, kernel));
4145 
4146 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4147 	if (pathBuffer.InitCheck() != B_OK)
4148 		return B_NO_MEMORY;
4149 
4150 	fs_mount* mount;
4151 	status_t status = get_mount(volume->id, &mount);
4152 	if (status != B_OK)
4153 		return status;
4154 
4155 	char* buffer = pathBuffer.LockBuffer();
4156 	strlcpy(buffer, path, pathBuffer.BufferSize());
4157 
4158 	struct vnode* vnode = mount->root_vnode;
4159 
4160 	if (buffer[0] == '/')
4161 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4162 	else {
4163 		inc_vnode_ref_count(vnode);
4164 			// vnode_path_to_vnode() releases a reference to the starting vnode
4165 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4166 			kernel, &vnode, NULL);
4167 	}
4168 
4169 	put_mount(mount);
4170 
4171 	if (status != B_OK)
4172 		return status;
4173 
4174 	if (vnode->device != volume->id) {
4175 		// wrong mount ID - must not gain access on foreign file system nodes
4176 		put_vnode(vnode);
4177 		return B_BAD_VALUE;
4178 	}
4179 
4180 	// Use get_vnode() to resolve the cookie for the right layer.
4181 	status = get_vnode(volume, vnode->id, _node);
4182 	put_vnode(vnode);
4183 
4184 	return status;
4185 }
4186 
4187 
4188 status_t
4189 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4190 	struct stat* stat, bool kernel)
4191 {
4192 	status_t status;
4193 
4194 	if (path) {
4195 		// path given: get the stat of the node referred to by (fd, path)
4196 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4197 		if (pathBuffer.InitCheck() != B_OK)
4198 			return B_NO_MEMORY;
4199 
4200 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4201 			traverseLeafLink, stat, kernel);
4202 	} else {
4203 		// no path given: get the FD and use the FD operation
4204 		struct file_descriptor* descriptor
4205 			= get_fd(get_current_io_context(kernel), fd);
4206 		if (descriptor == NULL)
4207 			return B_FILE_ERROR;
4208 
4209 		if (descriptor->ops->fd_read_stat)
4210 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4211 		else
4212 			status = B_UNSUPPORTED;
4213 
4214 		put_fd(descriptor);
4215 	}
4216 
4217 	return status;
4218 }
4219 
4220 
4221 /*!	Finds the full path to the file that contains the module \a moduleName,
4222 	puts it into \a pathBuffer, and returns B_OK for success.
4223 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4224 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4225 	\a pathBuffer is clobbered in any case and must not be relied on if this
4226 	functions returns unsuccessfully.
4227 	\a basePath and \a pathBuffer must not point to the same space.
4228 */
4229 status_t
4230 vfs_get_module_path(const char* basePath, const char* moduleName,
4231 	char* pathBuffer, size_t bufferSize)
4232 {
4233 	struct vnode* dir;
4234 	struct vnode* file;
4235 	status_t status;
4236 	size_t length;
4237 	char* path;
4238 
4239 	if (bufferSize == 0
4240 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4241 		return B_BUFFER_OVERFLOW;
4242 
4243 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4244 	if (status != B_OK)
4245 		return status;
4246 
4247 	// the path buffer had been clobbered by the above call
4248 	length = strlcpy(pathBuffer, basePath, bufferSize);
4249 	if (pathBuffer[length - 1] != '/')
4250 		pathBuffer[length++] = '/';
4251 
4252 	path = pathBuffer + length;
4253 	bufferSize -= length;
4254 
4255 	while (moduleName) {
4256 		char* nextPath = strchr(moduleName, '/');
4257 		if (nextPath == NULL)
4258 			length = strlen(moduleName);
4259 		else {
4260 			length = nextPath - moduleName;
4261 			nextPath++;
4262 		}
4263 
4264 		if (length + 1 >= bufferSize) {
4265 			status = B_BUFFER_OVERFLOW;
4266 			goto err;
4267 		}
4268 
4269 		memcpy(path, moduleName, length);
4270 		path[length] = '\0';
4271 		moduleName = nextPath;
4272 
4273 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4274 		if (status != B_OK) {
4275 			// vnode_path_to_vnode() has already released the reference to dir
4276 			return status;
4277 		}
4278 
4279 		if (S_ISDIR(file->Type())) {
4280 			// goto the next directory
4281 			path[length] = '/';
4282 			path[length + 1] = '\0';
4283 			path += length + 1;
4284 			bufferSize -= length + 1;
4285 
4286 			dir = file;
4287 		} else if (S_ISREG(file->Type())) {
4288 			// it's a file so it should be what we've searched for
4289 			put_vnode(file);
4290 
4291 			return B_OK;
4292 		} else {
4293 			TRACE(("vfs_get_module_path(): something is strange here: "
4294 				"0x%08lx...\n", file->Type()));
4295 			status = B_ERROR;
4296 			dir = file;
4297 			goto err;
4298 		}
4299 	}
4300 
4301 	// if we got here, the moduleName just pointed to a directory, not to
4302 	// a real module - what should we do in this case?
4303 	status = B_ENTRY_NOT_FOUND;
4304 
4305 err:
4306 	put_vnode(dir);
4307 	return status;
4308 }
4309 
4310 
4311 /*!	\brief Normalizes a given path.
4312 
4313 	The path must refer to an existing or non-existing entry in an existing
4314 	directory, that is chopping off the leaf component the remaining path must
4315 	refer to an existing directory.
4316 
4317 	The returned will be canonical in that it will be absolute, will not
4318 	contain any "." or ".." components or duplicate occurrences of '/'s,
4319 	and none of the directory components will by symbolic links.
4320 
4321 	Any two paths referring to the same entry, will result in the same
4322 	normalized path (well, that is pretty much the definition of `normalized',
4323 	isn't it :-).
4324 
4325 	\param path The path to be normalized.
4326 	\param buffer The buffer into which the normalized path will be written.
4327 		   May be the same one as \a path.
4328 	\param bufferSize The size of \a buffer.
4329 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4330 	\param kernel \c true, if the IO context of the kernel shall be used,
4331 		   otherwise that of the team this thread belongs to. Only relevant,
4332 		   if the path is relative (to get the CWD).
4333 	\return \c B_OK if everything went fine, another error code otherwise.
4334 */
4335 status_t
4336 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4337 	bool traverseLink, bool kernel)
4338 {
4339 	if (!path || !buffer || bufferSize < 1)
4340 		return B_BAD_VALUE;
4341 
4342 	if (path != buffer) {
4343 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4344 			return B_BUFFER_OVERFLOW;
4345 	}
4346 
4347 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4348 }
4349 
4350 
4351 /*!	\brief Creates a special node in the file system.
4352 
4353 	The caller gets a reference to the newly created node (which is passed
4354 	back through \a _createdVnode) and is responsible for releasing it.
4355 
4356 	\param path The path where to create the entry for the node. Can be \c NULL,
4357 		in which case the node is created without an entry in the root FS -- it
4358 		will automatically be deleted when the last reference has been released.
4359 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4360 		the target file system will just create the node with its standard
4361 		operations. Depending on the type of the node a subnode might be created
4362 		automatically, though.
4363 	\param mode The type and permissions for the node to be created.
4364 	\param flags Flags to be passed to the creating FS.
4365 	\param kernel \c true, if called in the kernel context (relevant only if
4366 		\a path is not \c NULL and not absolute).
4367 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4368 		file system creating the node, with the private data pointer and
4369 		operations for the super node. Can be \c NULL.
4370 	\param _createVnode Pointer to pre-allocated storage where to store the
4371 		pointer to the newly created node.
4372 	\return \c B_OK, if everything went fine, another error code otherwise.
4373 */
4374 status_t
4375 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4376 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4377 	struct vnode** _createdVnode)
4378 {
4379 	struct vnode* dirNode;
4380 	char _leaf[B_FILE_NAME_LENGTH];
4381 	char* leaf = NULL;
4382 
4383 	if (path) {
4384 		// We've got a path. Get the dir vnode and the leaf name.
4385 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4386 		if (tmpPathBuffer.InitCheck() != B_OK)
4387 			return B_NO_MEMORY;
4388 
4389 		char* tmpPath = tmpPathBuffer.LockBuffer();
4390 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4391 			return B_NAME_TOO_LONG;
4392 
4393 		// get the dir vnode and the leaf name
4394 		leaf = _leaf;
4395 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4396 		if (error != B_OK)
4397 			return error;
4398 	} else {
4399 		// No path. Create the node in the root FS.
4400 		dirNode = sRoot;
4401 		inc_vnode_ref_count(dirNode);
4402 	}
4403 
4404 	VNodePutter _(dirNode);
4405 
4406 	// check support for creating special nodes
4407 	if (!HAS_FS_CALL(dirNode, create_special_node))
4408 		return B_UNSUPPORTED;
4409 
4410 	// create the node
4411 	fs_vnode superVnode;
4412 	ino_t nodeID;
4413 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4414 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4415 	if (status != B_OK)
4416 		return status;
4417 
4418 	// lookup the node
4419 	rw_lock_read_lock(&sVnodeLock);
4420 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4421 	rw_lock_read_unlock(&sVnodeLock);
4422 
4423 	if (*_createdVnode == NULL) {
4424 		panic("vfs_create_special_node(): lookup of node failed");
4425 		return B_ERROR;
4426 	}
4427 
4428 	return B_OK;
4429 }
4430 
4431 
4432 extern "C" void
4433 vfs_put_vnode(struct vnode* vnode)
4434 {
4435 	put_vnode(vnode);
4436 }
4437 
4438 
4439 extern "C" status_t
4440 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4441 {
4442 	// Get current working directory from io context
4443 	struct io_context* context = get_current_io_context(false);
4444 	status_t status = B_OK;
4445 
4446 	mutex_lock(&context->io_mutex);
4447 
4448 	if (context->cwd != NULL) {
4449 		*_mountID = context->cwd->device;
4450 		*_vnodeID = context->cwd->id;
4451 	} else
4452 		status = B_ERROR;
4453 
4454 	mutex_unlock(&context->io_mutex);
4455 	return status;
4456 }
4457 
4458 
4459 status_t
4460 vfs_unmount(dev_t mountID, uint32 flags)
4461 {
4462 	return fs_unmount(NULL, mountID, flags, true);
4463 }
4464 
4465 
4466 extern "C" status_t
4467 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4468 {
4469 	struct vnode* vnode;
4470 
4471 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4472 	if (status != B_OK)
4473 		return status;
4474 
4475 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4476 	put_vnode(vnode);
4477 	return B_OK;
4478 }
4479 
4480 
4481 extern "C" void
4482 vfs_free_unused_vnodes(int32 level)
4483 {
4484 	vnode_low_resource_handler(NULL,
4485 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4486 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4487 		level);
4488 }
4489 
4490 
4491 extern "C" bool
4492 vfs_can_page(struct vnode* vnode, void* cookie)
4493 {
4494 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4495 
4496 	if (HAS_FS_CALL(vnode, can_page))
4497 		return FS_CALL(vnode, can_page, cookie);
4498 	return false;
4499 }
4500 
4501 
4502 extern "C" status_t
4503 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4504 	const generic_io_vec* vecs, size_t count, uint32 flags,
4505 	generic_size_t* _numBytes)
4506 {
4507 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4508 		pos));
4509 
4510 #if VFS_PAGES_IO_TRACING
4511 	generic_size_t bytesRequested = *_numBytes;
4512 #endif
4513 
4514 	IORequest request;
4515 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4516 	if (status == B_OK) {
4517 		status = vfs_vnode_io(vnode, cookie, &request);
4518 		if (status == B_OK)
4519 			status = request.Wait();
4520 		*_numBytes = request.TransferredBytes();
4521 	}
4522 
4523 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4524 		status, *_numBytes));
4525 
4526 	return status;
4527 }
4528 
4529 
4530 extern "C" status_t
4531 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4532 	const generic_io_vec* vecs, size_t count, uint32 flags,
4533 	generic_size_t* _numBytes)
4534 {
4535 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4536 		pos));
4537 
4538 #if VFS_PAGES_IO_TRACING
4539 	generic_size_t bytesRequested = *_numBytes;
4540 #endif
4541 
4542 	IORequest request;
4543 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4544 	if (status == B_OK) {
4545 		status = vfs_vnode_io(vnode, cookie, &request);
4546 		if (status == B_OK)
4547 			status = request.Wait();
4548 		*_numBytes = request.TransferredBytes();
4549 	}
4550 
4551 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4552 		status, *_numBytes));
4553 
4554 	return status;
4555 }
4556 
4557 
4558 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4559 	created if \a allocate is \c true.
4560 	In case it's successful, it will also grab a reference to the cache
4561 	it returns.
4562 */
4563 extern "C" status_t
4564 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4565 {
4566 	if (vnode->cache != NULL) {
4567 		vnode->cache->AcquireRef();
4568 		*_cache = vnode->cache;
4569 		return B_OK;
4570 	}
4571 
4572 	rw_lock_read_lock(&sVnodeLock);
4573 	vnode->Lock();
4574 
4575 	status_t status = B_OK;
4576 
4577 	// The cache could have been created in the meantime
4578 	if (vnode->cache == NULL) {
4579 		if (allocate) {
4580 			// TODO: actually the vnode needs to be busy already here, or
4581 			//	else this won't work...
4582 			bool wasBusy = vnode->IsBusy();
4583 			vnode->SetBusy(true);
4584 
4585 			vnode->Unlock();
4586 			rw_lock_read_unlock(&sVnodeLock);
4587 
4588 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4589 
4590 			rw_lock_read_lock(&sVnodeLock);
4591 			vnode->Lock();
4592 			vnode->SetBusy(wasBusy);
4593 		} else
4594 			status = B_BAD_VALUE;
4595 	}
4596 
4597 	vnode->Unlock();
4598 	rw_lock_read_unlock(&sVnodeLock);
4599 
4600 	if (status == B_OK) {
4601 		vnode->cache->AcquireRef();
4602 		*_cache = vnode->cache;
4603 	}
4604 
4605 	return status;
4606 }
4607 
4608 
4609 status_t
4610 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4611 	file_io_vec* vecs, size_t* _count)
4612 {
4613 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n",
4614 		vnode, vecs, offset, size));
4615 
4616 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4617 }
4618 
4619 
4620 status_t
4621 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4622 {
4623 	status_t status = FS_CALL(vnode, read_stat, stat);
4624 
4625 	// fill in the st_dev and st_ino fields
4626 	if (status == B_OK) {
4627 		stat->st_dev = vnode->device;
4628 		stat->st_ino = vnode->id;
4629 		stat->st_rdev = -1;
4630 	}
4631 
4632 	return status;
4633 }
4634 
4635 
4636 status_t
4637 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4638 {
4639 	struct vnode* vnode;
4640 	status_t status = get_vnode(device, inode, &vnode, true, false);
4641 	if (status != B_OK)
4642 		return status;
4643 
4644 	status = FS_CALL(vnode, read_stat, stat);
4645 
4646 	// fill in the st_dev and st_ino fields
4647 	if (status == B_OK) {
4648 		stat->st_dev = vnode->device;
4649 		stat->st_ino = vnode->id;
4650 		stat->st_rdev = -1;
4651 	}
4652 
4653 	put_vnode(vnode);
4654 	return status;
4655 }
4656 
4657 
4658 status_t
4659 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4660 {
4661 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4662 }
4663 
4664 
4665 status_t
4666 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4667 	char* path, size_t pathLength)
4668 {
4669 	struct vnode* vnode;
4670 	status_t status;
4671 
4672 	// filter invalid leaf names
4673 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4674 		return B_BAD_VALUE;
4675 
4676 	// get the vnode matching the dir's node_ref
4677 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4678 		// special cases "." and "..": we can directly get the vnode of the
4679 		// referenced directory
4680 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4681 		leaf = NULL;
4682 	} else
4683 		status = get_vnode(device, inode, &vnode, true, false);
4684 	if (status != B_OK)
4685 		return status;
4686 
4687 	// get the directory path
4688 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4689 	put_vnode(vnode);
4690 		// we don't need the vnode anymore
4691 	if (status != B_OK)
4692 		return status;
4693 
4694 	// append the leaf name
4695 	if (leaf) {
4696 		// insert a directory separator if this is not the file system root
4697 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4698 				>= pathLength)
4699 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4700 			return B_NAME_TOO_LONG;
4701 		}
4702 	}
4703 
4704 	return B_OK;
4705 }
4706 
4707 
4708 /*!	If the given descriptor locked its vnode, that lock will be released. */
4709 void
4710 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4711 {
4712 	struct vnode* vnode = fd_vnode(descriptor);
4713 
4714 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4715 		vnode->mandatory_locked_by = NULL;
4716 }
4717 
4718 
4719 /*!	Closes all file descriptors of the specified I/O context that
4720 	have the O_CLOEXEC flag set.
4721 */
4722 void
4723 vfs_exec_io_context(io_context* context)
4724 {
4725 	uint32 i;
4726 
4727 	for (i = 0; i < context->table_size; i++) {
4728 		mutex_lock(&context->io_mutex);
4729 
4730 		struct file_descriptor* descriptor = context->fds[i];
4731 		bool remove = false;
4732 
4733 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4734 			context->fds[i] = NULL;
4735 			context->num_used_fds--;
4736 
4737 			remove = true;
4738 		}
4739 
4740 		mutex_unlock(&context->io_mutex);
4741 
4742 		if (remove) {
4743 			close_fd(descriptor);
4744 			put_fd(descriptor);
4745 		}
4746 	}
4747 }
4748 
4749 
4750 /*! Sets up a new io_control structure, and inherits the properties
4751 	of the parent io_control if it is given.
4752 */
4753 io_context*
4754 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4755 {
4756 	io_context* context = (io_context*)malloc(sizeof(io_context));
4757 	if (context == NULL)
4758 		return NULL;
4759 
4760 	TIOC(NewIOContext(context, parentContext));
4761 
4762 	memset(context, 0, sizeof(io_context));
4763 	context->ref_count = 1;
4764 
4765 	MutexLocker parentLocker;
4766 
4767 	size_t tableSize;
4768 	if (parentContext) {
4769 		parentLocker.SetTo(parentContext->io_mutex, false);
4770 		tableSize = parentContext->table_size;
4771 	} else
4772 		tableSize = DEFAULT_FD_TABLE_SIZE;
4773 
4774 	// allocate space for FDs and their close-on-exec flag
4775 	context->fds = (file_descriptor**)malloc(
4776 		sizeof(struct file_descriptor*) * tableSize
4777 		+ sizeof(struct select_sync*) * tableSize
4778 		+ (tableSize + 7) / 8);
4779 	if (context->fds == NULL) {
4780 		free(context);
4781 		return NULL;
4782 	}
4783 
4784 	context->select_infos = (select_info**)(context->fds + tableSize);
4785 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4786 
4787 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4788 		+ sizeof(struct select_sync*) * tableSize
4789 		+ (tableSize + 7) / 8);
4790 
4791 	mutex_init(&context->io_mutex, "I/O context");
4792 
4793 	// Copy all parent file descriptors
4794 
4795 	if (parentContext) {
4796 		size_t i;
4797 
4798 		mutex_lock(&sIOContextRootLock);
4799 		context->root = parentContext->root;
4800 		if (context->root)
4801 			inc_vnode_ref_count(context->root);
4802 		mutex_unlock(&sIOContextRootLock);
4803 
4804 		context->cwd = parentContext->cwd;
4805 		if (context->cwd)
4806 			inc_vnode_ref_count(context->cwd);
4807 
4808 		for (i = 0; i < tableSize; i++) {
4809 			struct file_descriptor* descriptor = parentContext->fds[i];
4810 
4811 			if (descriptor != NULL) {
4812 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4813 				if (closeOnExec && purgeCloseOnExec)
4814 					continue;
4815 
4816 				TFD(InheritFD(context, i, descriptor, parentContext));
4817 
4818 				context->fds[i] = descriptor;
4819 				context->num_used_fds++;
4820 				atomic_add(&descriptor->ref_count, 1);
4821 				atomic_add(&descriptor->open_count, 1);
4822 
4823 				if (closeOnExec)
4824 					fd_set_close_on_exec(context, i, true);
4825 			}
4826 		}
4827 
4828 		parentLocker.Unlock();
4829 	} else {
4830 		context->root = sRoot;
4831 		context->cwd = sRoot;
4832 
4833 		if (context->root)
4834 			inc_vnode_ref_count(context->root);
4835 
4836 		if (context->cwd)
4837 			inc_vnode_ref_count(context->cwd);
4838 	}
4839 
4840 	context->table_size = tableSize;
4841 
4842 	list_init(&context->node_monitors);
4843 	context->max_monitors = DEFAULT_NODE_MONITORS;
4844 
4845 	return context;
4846 }
4847 
4848 
4849 static status_t
4850 vfs_free_io_context(io_context* context)
4851 {
4852 	uint32 i;
4853 
4854 	TIOC(FreeIOContext(context));
4855 
4856 	if (context->root)
4857 		put_vnode(context->root);
4858 
4859 	if (context->cwd)
4860 		put_vnode(context->cwd);
4861 
4862 	mutex_lock(&context->io_mutex);
4863 
4864 	for (i = 0; i < context->table_size; i++) {
4865 		if (struct file_descriptor* descriptor = context->fds[i]) {
4866 			close_fd(descriptor);
4867 			put_fd(descriptor);
4868 		}
4869 	}
4870 
4871 	mutex_destroy(&context->io_mutex);
4872 
4873 	remove_node_monitors(context);
4874 	free(context->fds);
4875 	free(context);
4876 
4877 	return B_OK;
4878 }
4879 
4880 
4881 void
4882 vfs_get_io_context(io_context* context)
4883 {
4884 	atomic_add(&context->ref_count, 1);
4885 }
4886 
4887 
4888 void
4889 vfs_put_io_context(io_context* context)
4890 {
4891 	if (atomic_add(&context->ref_count, -1) == 1)
4892 		vfs_free_io_context(context);
4893 }
4894 
4895 
4896 static status_t
4897 vfs_resize_fd_table(struct io_context* context, const int newSize)
4898 {
4899 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4900 		return B_BAD_VALUE;
4901 
4902 	TIOC(ResizeIOContext(context, newSize));
4903 
4904 	MutexLocker _(context->io_mutex);
4905 
4906 	int oldSize = context->table_size;
4907 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4908 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4909 
4910 	// If the tables shrink, make sure none of the fds being dropped are in use.
4911 	if (newSize < oldSize) {
4912 		for (int i = oldSize; i-- > newSize;) {
4913 			if (context->fds[i])
4914 				return B_BUSY;
4915 		}
4916 	}
4917 
4918 	// store pointers to the old tables
4919 	file_descriptor** oldFDs = context->fds;
4920 	select_info** oldSelectInfos = context->select_infos;
4921 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4922 
4923 	// allocate new tables
4924 	file_descriptor** newFDs = (file_descriptor**)malloc(
4925 		sizeof(struct file_descriptor*) * newSize
4926 		+ sizeof(struct select_sync*) * newSize
4927 		+ newCloseOnExitBitmapSize);
4928 	if (newFDs == NULL)
4929 		return B_NO_MEMORY;
4930 
4931 	context->fds = newFDs;
4932 	context->select_infos = (select_info**)(context->fds + newSize);
4933 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4934 	context->table_size = newSize;
4935 
4936 	// copy entries from old tables
4937 	int toCopy = min_c(oldSize, newSize);
4938 
4939 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4940 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4941 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4942 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4943 
4944 	// clear additional entries, if the tables grow
4945 	if (newSize > oldSize) {
4946 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4947 		memset(context->select_infos + oldSize, 0,
4948 			sizeof(void*) * (newSize - oldSize));
4949 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4950 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4951 	}
4952 
4953 	free(oldFDs);
4954 
4955 	return B_OK;
4956 }
4957 
4958 
4959 static status_t
4960 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4961 {
4962 	int	status = B_OK;
4963 
4964 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4965 		return B_BAD_VALUE;
4966 
4967 	mutex_lock(&context->io_mutex);
4968 
4969 	if ((size_t)newSize < context->num_monitors) {
4970 		status = B_BUSY;
4971 		goto out;
4972 	}
4973 	context->max_monitors = newSize;
4974 
4975 out:
4976 	mutex_unlock(&context->io_mutex);
4977 	return status;
4978 }
4979 
4980 
4981 status_t
4982 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
4983 	ino_t* _mountPointNodeID)
4984 {
4985 	ReadLocker nodeLocker(sVnodeLock);
4986 	MutexLocker mountLocker(sMountMutex);
4987 
4988 	struct fs_mount* mount = find_mount(mountID);
4989 	if (mount == NULL)
4990 		return B_BAD_VALUE;
4991 
4992 	Vnode* mountPoint = mount->covers_vnode;
4993 
4994 	*_mountPointMountID = mountPoint->device;
4995 	*_mountPointNodeID = mountPoint->id;
4996 
4997 	return B_OK;
4998 }
4999 
5000 
5001 status_t
5002 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5003 	ino_t coveredNodeID)
5004 {
5005 	// get the vnodes
5006 	Vnode* vnode;
5007 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5008 	if (error != B_OK)
5009 		return B_BAD_VALUE;
5010 	VNodePutter vnodePutter(vnode);
5011 
5012 	Vnode* coveredVnode;
5013 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5014 		false);
5015 	if (error != B_OK)
5016 		return B_BAD_VALUE;
5017 	VNodePutter coveredVnodePutter(coveredVnode);
5018 
5019 	// establish the covered/covering links
5020 	WriteLocker locker(sVnodeLock);
5021 
5022 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5023 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5024 		return B_BUSY;
5025 	}
5026 
5027 	vnode->covers = coveredVnode;
5028 	vnode->SetCovering(true);
5029 
5030 	coveredVnode->covered_by = vnode;
5031 	coveredVnode->SetCovered(true);
5032 
5033 	// the vnodes do now reference each other
5034 	inc_vnode_ref_count(vnode);
5035 	inc_vnode_ref_count(coveredVnode);
5036 
5037 	return B_OK;
5038 }
5039 
5040 
5041 int
5042 vfs_getrlimit(int resource, struct rlimit* rlp)
5043 {
5044 	if (!rlp)
5045 		return B_BAD_ADDRESS;
5046 
5047 	switch (resource) {
5048 		case RLIMIT_NOFILE:
5049 		{
5050 			struct io_context* context = get_current_io_context(false);
5051 			MutexLocker _(context->io_mutex);
5052 
5053 			rlp->rlim_cur = context->table_size;
5054 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5055 			return 0;
5056 		}
5057 
5058 		case RLIMIT_NOVMON:
5059 		{
5060 			struct io_context* context = get_current_io_context(false);
5061 			MutexLocker _(context->io_mutex);
5062 
5063 			rlp->rlim_cur = context->max_monitors;
5064 			rlp->rlim_max = MAX_NODE_MONITORS;
5065 			return 0;
5066 		}
5067 
5068 		default:
5069 			return B_BAD_VALUE;
5070 	}
5071 }
5072 
5073 
5074 int
5075 vfs_setrlimit(int resource, const struct rlimit* rlp)
5076 {
5077 	if (!rlp)
5078 		return B_BAD_ADDRESS;
5079 
5080 	switch (resource) {
5081 		case RLIMIT_NOFILE:
5082 			/* TODO: check getuid() */
5083 			if (rlp->rlim_max != RLIM_SAVED_MAX
5084 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5085 				return B_NOT_ALLOWED;
5086 
5087 			return vfs_resize_fd_table(get_current_io_context(false),
5088 				rlp->rlim_cur);
5089 
5090 		case RLIMIT_NOVMON:
5091 			/* TODO: check getuid() */
5092 			if (rlp->rlim_max != RLIM_SAVED_MAX
5093 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5094 				return B_NOT_ALLOWED;
5095 
5096 			return vfs_resize_monitor_table(get_current_io_context(false),
5097 				rlp->rlim_cur);
5098 
5099 		default:
5100 			return B_BAD_VALUE;
5101 	}
5102 }
5103 
5104 
5105 status_t
5106 vfs_init(kernel_args* args)
5107 {
5108 	vnode::StaticInit();
5109 
5110 	struct vnode dummyVnode;
5111 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5112 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5113 	if (sVnodeTable == NULL)
5114 		panic("vfs_init: error creating vnode hash table\n");
5115 
5116 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5117 
5118 	struct fs_mount dummyMount;
5119 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5120 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5121 	if (sMountsTable == NULL)
5122 		panic("vfs_init: error creating mounts hash table\n");
5123 
5124 	node_monitor_init();
5125 
5126 	sRoot = NULL;
5127 
5128 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5129 
5130 	if (block_cache_init() != B_OK)
5131 		return B_ERROR;
5132 
5133 #ifdef ADD_DEBUGGER_COMMANDS
5134 	// add some debugger commands
5135 	add_debugger_command_etc("vnode", &dump_vnode,
5136 		"Print info about the specified vnode",
5137 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5138 		"Prints information about the vnode specified by address <vnode> or\n"
5139 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5140 		"constructed and printed. It might not be possible to construct a\n"
5141 		"complete path, though.\n",
5142 		0);
5143 	add_debugger_command("vnodes", &dump_vnodes,
5144 		"list all vnodes (from the specified device)");
5145 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5146 		"list all vnode caches");
5147 	add_debugger_command("mount", &dump_mount,
5148 		"info about the specified fs_mount");
5149 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5150 	add_debugger_command("io_context", &dump_io_context,
5151 		"info about the I/O context");
5152 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5153 		"info about vnode usage");
5154 #endif
5155 
5156 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5157 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5158 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5159 		0);
5160 
5161 	file_map_init();
5162 
5163 	return file_cache_init();
5164 }
5165 
5166 
5167 //	#pragma mark - fd_ops implementations
5168 
5169 
5170 /*!
5171 	Calls fs_open() on the given vnode and returns a new
5172 	file descriptor for it
5173 */
5174 static int
5175 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5176 {
5177 	void* cookie;
5178 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5179 	if (status != B_OK)
5180 		return status;
5181 
5182 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5183 	if (fd < 0) {
5184 		FS_CALL(vnode, close, cookie);
5185 		FS_CALL(vnode, free_cookie, cookie);
5186 	}
5187 	return fd;
5188 }
5189 
5190 
5191 /*!
5192 	Calls fs_open() on the given vnode and returns a new
5193 	file descriptor for it
5194 */
5195 static int
5196 create_vnode(struct vnode* directory, const char* name, int openMode,
5197 	int perms, bool kernel)
5198 {
5199 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5200 	status_t status = B_ERROR;
5201 	struct vnode* vnode;
5202 	void* cookie;
5203 	ino_t newID;
5204 
5205 	// This is somewhat tricky: If the entry already exists, the FS responsible
5206 	// for the directory might not necessarily also be the one responsible for
5207 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5208 	// we can actually never call the create() hook without O_EXCL. Instead we
5209 	// try to look the entry up first. If it already exists, we just open the
5210 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5211 	// introduces a race condition, since someone else might have created the
5212 	// entry in the meantime. We hope the respective FS returns the correct
5213 	// error code and retry (up to 3 times) again.
5214 
5215 	for (int i = 0; i < 3 && status != B_OK; i++) {
5216 		// look the node up
5217 		status = lookup_dir_entry(directory, name, &vnode);
5218 		if (status == B_OK) {
5219 			VNodePutter putter(vnode);
5220 
5221 			if ((openMode & O_EXCL) != 0)
5222 				return B_FILE_EXISTS;
5223 
5224 			// If the node is a symlink, we have to follow it, unless
5225 			// O_NOTRAVERSE is set.
5226 			if (S_ISLNK(vnode->Type()) && traverse) {
5227 				putter.Put();
5228 				char clonedName[B_FILE_NAME_LENGTH + 1];
5229 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5230 						>= B_FILE_NAME_LENGTH) {
5231 					return B_NAME_TOO_LONG;
5232 				}
5233 
5234 				inc_vnode_ref_count(directory);
5235 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5236 					kernel, &vnode, NULL);
5237 				if (status != B_OK)
5238 					return status;
5239 
5240 				putter.SetTo(vnode);
5241 			}
5242 
5243 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5244 				put_vnode(vnode);
5245 				return B_LINK_LIMIT;
5246 			}
5247 
5248 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5249 			// on success keep the vnode reference for the FD
5250 			if (fd >= 0)
5251 				putter.Detach();
5252 
5253 			return fd;
5254 		}
5255 
5256 		// it doesn't exist yet -- try to create it
5257 
5258 		if (!HAS_FS_CALL(directory, create))
5259 			return B_READ_ONLY_DEVICE;
5260 
5261 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5262 			&cookie, &newID);
5263 		if (status != B_OK
5264 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5265 			return status;
5266 		}
5267 	}
5268 
5269 	if (status != B_OK)
5270 		return status;
5271 
5272 	// the node has been created successfully
5273 
5274 	rw_lock_read_lock(&sVnodeLock);
5275 	vnode = lookup_vnode(directory->device, newID);
5276 	rw_lock_read_unlock(&sVnodeLock);
5277 
5278 	if (vnode == NULL) {
5279 		panic("vfs: fs_create() returned success but there is no vnode, "
5280 			"mount ID %ld!\n", directory->device);
5281 		return B_BAD_VALUE;
5282 	}
5283 
5284 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5285 	if (fd >= 0)
5286 		return fd;
5287 
5288 	status = fd;
5289 
5290 	// something went wrong, clean up
5291 
5292 	FS_CALL(vnode, close, cookie);
5293 	FS_CALL(vnode, free_cookie, cookie);
5294 	put_vnode(vnode);
5295 
5296 	FS_CALL(directory, unlink, name);
5297 
5298 	return status;
5299 }
5300 
5301 
5302 /*! Calls fs open_dir() on the given vnode and returns a new
5303 	file descriptor for it
5304 */
5305 static int
5306 open_dir_vnode(struct vnode* vnode, bool kernel)
5307 {
5308 	void* cookie;
5309 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5310 	if (status != B_OK)
5311 		return status;
5312 
5313 	// directory is opened, create a fd
5314 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5315 	if (status >= 0)
5316 		return status;
5317 
5318 	FS_CALL(vnode, close_dir, cookie);
5319 	FS_CALL(vnode, free_dir_cookie, cookie);
5320 
5321 	return status;
5322 }
5323 
5324 
5325 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5326 	file descriptor for it.
5327 	Used by attr_dir_open(), and attr_dir_open_fd().
5328 */
5329 static int
5330 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5331 {
5332 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5333 		return B_UNSUPPORTED;
5334 
5335 	void* cookie;
5336 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5337 	if (status != B_OK)
5338 		return status;
5339 
5340 	// directory is opened, create a fd
5341 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5342 		kernel);
5343 	if (status >= 0)
5344 		return status;
5345 
5346 	FS_CALL(vnode, close_attr_dir, cookie);
5347 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5348 
5349 	return status;
5350 }
5351 
5352 
5353 static int
5354 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5355 	int openMode, int perms, bool kernel)
5356 {
5357 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5358 		"kernel %d\n", name, openMode, perms, kernel));
5359 
5360 	// get directory to put the new file in
5361 	struct vnode* directory;
5362 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5363 	if (status != B_OK)
5364 		return status;
5365 
5366 	status = create_vnode(directory, name, openMode, perms, kernel);
5367 	put_vnode(directory);
5368 
5369 	return status;
5370 }
5371 
5372 
5373 static int
5374 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5375 {
5376 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5377 		openMode, perms, kernel));
5378 
5379 	// get directory to put the new file in
5380 	char name[B_FILE_NAME_LENGTH];
5381 	struct vnode* directory;
5382 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5383 		kernel);
5384 	if (status < 0)
5385 		return status;
5386 
5387 	status = create_vnode(directory, name, openMode, perms, kernel);
5388 
5389 	put_vnode(directory);
5390 	return status;
5391 }
5392 
5393 
5394 static int
5395 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5396 	int openMode, bool kernel)
5397 {
5398 	if (name == NULL || *name == '\0')
5399 		return B_BAD_VALUE;
5400 
5401 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5402 		mountID, directoryID, name, openMode));
5403 
5404 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5405 
5406 	// get the vnode matching the entry_ref
5407 	struct vnode* vnode;
5408 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5409 		kernel, &vnode);
5410 	if (status != B_OK)
5411 		return status;
5412 
5413 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5414 		put_vnode(vnode);
5415 		return B_LINK_LIMIT;
5416 	}
5417 
5418 	int newFD = open_vnode(vnode, openMode, kernel);
5419 	if (newFD >= 0) {
5420 		// The vnode reference has been transferred to the FD
5421 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5422 			directoryID, vnode->id, name);
5423 	} else
5424 		put_vnode(vnode);
5425 
5426 	return newFD;
5427 }
5428 
5429 
5430 static int
5431 file_open(int fd, char* path, int openMode, bool kernel)
5432 {
5433 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5434 
5435 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5436 		fd, path, openMode, kernel));
5437 
5438 	// get the vnode matching the vnode + path combination
5439 	struct vnode* vnode;
5440 	ino_t parentID;
5441 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5442 		&parentID, kernel);
5443 	if (status != B_OK)
5444 		return status;
5445 
5446 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5447 		put_vnode(vnode);
5448 		return B_LINK_LIMIT;
5449 	}
5450 
5451 	// open the vnode
5452 	int newFD = open_vnode(vnode, openMode, kernel);
5453 	if (newFD >= 0) {
5454 		// The vnode reference has been transferred to the FD
5455 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5456 			vnode->device, parentID, vnode->id, NULL);
5457 	} else
5458 		put_vnode(vnode);
5459 
5460 	return newFD;
5461 }
5462 
5463 
5464 static status_t
5465 file_close(struct file_descriptor* descriptor)
5466 {
5467 	struct vnode* vnode = descriptor->u.vnode;
5468 	status_t status = B_OK;
5469 
5470 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5471 
5472 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5473 		vnode->id);
5474 	if (HAS_FS_CALL(vnode, close)) {
5475 		status = FS_CALL(vnode, close, descriptor->cookie);
5476 	}
5477 
5478 	if (status == B_OK) {
5479 		// remove all outstanding locks for this team
5480 		release_advisory_lock(vnode, NULL);
5481 	}
5482 	return status;
5483 }
5484 
5485 
5486 static void
5487 file_free_fd(struct file_descriptor* descriptor)
5488 {
5489 	struct vnode* vnode = descriptor->u.vnode;
5490 
5491 	if (vnode != NULL) {
5492 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5493 		put_vnode(vnode);
5494 	}
5495 }
5496 
5497 
5498 static status_t
5499 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5500 	size_t* length)
5501 {
5502 	struct vnode* vnode = descriptor->u.vnode;
5503 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
5504 		*length));
5505 
5506 	if (S_ISDIR(vnode->Type()))
5507 		return B_IS_A_DIRECTORY;
5508 
5509 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5510 }
5511 
5512 
5513 static status_t
5514 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5515 	size_t* length)
5516 {
5517 	struct vnode* vnode = descriptor->u.vnode;
5518 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5519 
5520 	if (S_ISDIR(vnode->Type()))
5521 		return B_IS_A_DIRECTORY;
5522 	if (!HAS_FS_CALL(vnode, write))
5523 		return B_READ_ONLY_DEVICE;
5524 
5525 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5526 }
5527 
5528 
5529 static off_t
5530 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5531 {
5532 	struct vnode* vnode = descriptor->u.vnode;
5533 	off_t offset;
5534 
5535 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5536 
5537 	// some kinds of files are not seekable
5538 	switch (vnode->Type() & S_IFMT) {
5539 		case S_IFIFO:
5540 		case S_IFSOCK:
5541 			return ESPIPE;
5542 
5543 		// The Open Group Base Specs don't mention any file types besides pipes,
5544 		// fifos, and sockets specially, so we allow seeking them.
5545 		case S_IFREG:
5546 		case S_IFBLK:
5547 		case S_IFDIR:
5548 		case S_IFLNK:
5549 		case S_IFCHR:
5550 			break;
5551 	}
5552 
5553 	switch (seekType) {
5554 		case SEEK_SET:
5555 			offset = 0;
5556 			break;
5557 		case SEEK_CUR:
5558 			offset = descriptor->pos;
5559 			break;
5560 		case SEEK_END:
5561 		{
5562 			// stat() the node
5563 			if (!HAS_FS_CALL(vnode, read_stat))
5564 				return B_UNSUPPORTED;
5565 
5566 			struct stat stat;
5567 			status_t status = FS_CALL(vnode, read_stat, &stat);
5568 			if (status != B_OK)
5569 				return status;
5570 
5571 			offset = stat.st_size;
5572 			break;
5573 		}
5574 		default:
5575 			return B_BAD_VALUE;
5576 	}
5577 
5578 	// assumes off_t is 64 bits wide
5579 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5580 		return B_BUFFER_OVERFLOW;
5581 
5582 	pos += offset;
5583 	if (pos < 0)
5584 		return B_BAD_VALUE;
5585 
5586 	return descriptor->pos = pos;
5587 }
5588 
5589 
5590 static status_t
5591 file_select(struct file_descriptor* descriptor, uint8 event,
5592 	struct selectsync* sync)
5593 {
5594 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5595 
5596 	struct vnode* vnode = descriptor->u.vnode;
5597 
5598 	// If the FS has no select() hook, notify select() now.
5599 	if (!HAS_FS_CALL(vnode, select))
5600 		return notify_select_event(sync, event);
5601 
5602 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5603 }
5604 
5605 
5606 static status_t
5607 file_deselect(struct file_descriptor* descriptor, uint8 event,
5608 	struct selectsync* sync)
5609 {
5610 	struct vnode* vnode = descriptor->u.vnode;
5611 
5612 	if (!HAS_FS_CALL(vnode, deselect))
5613 		return B_OK;
5614 
5615 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5616 }
5617 
5618 
5619 static status_t
5620 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5621 	bool kernel)
5622 {
5623 	struct vnode* vnode;
5624 	status_t status;
5625 
5626 	if (name == NULL || *name == '\0')
5627 		return B_BAD_VALUE;
5628 
5629 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5630 		"perms = %d)\n", mountID, parentID, name, perms));
5631 
5632 	status = get_vnode(mountID, parentID, &vnode, true, false);
5633 	if (status != B_OK)
5634 		return status;
5635 
5636 	if (HAS_FS_CALL(vnode, create_dir))
5637 		status = FS_CALL(vnode, create_dir, name, perms);
5638 	else
5639 		status = B_READ_ONLY_DEVICE;
5640 
5641 	put_vnode(vnode);
5642 	return status;
5643 }
5644 
5645 
5646 static status_t
5647 dir_create(int fd, char* path, int perms, bool kernel)
5648 {
5649 	char filename[B_FILE_NAME_LENGTH];
5650 	struct vnode* vnode;
5651 	status_t status;
5652 
5653 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5654 		kernel));
5655 
5656 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5657 	if (status < 0)
5658 		return status;
5659 
5660 	if (HAS_FS_CALL(vnode, create_dir)) {
5661 		status = FS_CALL(vnode, create_dir, filename, perms);
5662 	} else
5663 		status = B_READ_ONLY_DEVICE;
5664 
5665 	put_vnode(vnode);
5666 	return status;
5667 }
5668 
5669 
5670 static int
5671 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5672 {
5673 	FUNCTION(("dir_open_entry_ref()\n"));
5674 
5675 	if (name && name[0] == '\0')
5676 		return B_BAD_VALUE;
5677 
5678 	// get the vnode matching the entry_ref/node_ref
5679 	struct vnode* vnode;
5680 	status_t status;
5681 	if (name) {
5682 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5683 			&vnode);
5684 	} else
5685 		status = get_vnode(mountID, parentID, &vnode, true, false);
5686 	if (status != B_OK)
5687 		return status;
5688 
5689 	int newFD = open_dir_vnode(vnode, kernel);
5690 	if (newFD >= 0) {
5691 		// The vnode reference has been transferred to the FD
5692 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5693 			vnode->id, name);
5694 	} else
5695 		put_vnode(vnode);
5696 
5697 	return newFD;
5698 }
5699 
5700 
5701 static int
5702 dir_open(int fd, char* path, bool kernel)
5703 {
5704 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5705 		kernel));
5706 
5707 	// get the vnode matching the vnode + path combination
5708 	struct vnode* vnode = NULL;
5709 	ino_t parentID;
5710 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5711 		kernel);
5712 	if (status != B_OK)
5713 		return status;
5714 
5715 	// open the dir
5716 	int newFD = open_dir_vnode(vnode, kernel);
5717 	if (newFD >= 0) {
5718 		// The vnode reference has been transferred to the FD
5719 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5720 			parentID, vnode->id, NULL);
5721 	} else
5722 		put_vnode(vnode);
5723 
5724 	return newFD;
5725 }
5726 
5727 
5728 static status_t
5729 dir_close(struct file_descriptor* descriptor)
5730 {
5731 	struct vnode* vnode = descriptor->u.vnode;
5732 
5733 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5734 
5735 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5736 		vnode->id);
5737 	if (HAS_FS_CALL(vnode, close_dir))
5738 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5739 
5740 	return B_OK;
5741 }
5742 
5743 
5744 static void
5745 dir_free_fd(struct file_descriptor* descriptor)
5746 {
5747 	struct vnode* vnode = descriptor->u.vnode;
5748 
5749 	if (vnode != NULL) {
5750 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5751 		put_vnode(vnode);
5752 	}
5753 }
5754 
5755 
5756 static status_t
5757 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5758 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5759 {
5760 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5761 		bufferSize, _count);
5762 }
5763 
5764 
5765 static status_t
5766 fix_dirent(struct vnode* parent, struct dirent* entry,
5767 	struct io_context* ioContext)
5768 {
5769 	// set d_pdev and d_pino
5770 	entry->d_pdev = parent->device;
5771 	entry->d_pino = parent->id;
5772 
5773 	// If this is the ".." entry and the directory covering another vnode,
5774 	// we need to replace d_dev and d_ino with the actual values.
5775 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5776 		// Make sure the IO context root is not bypassed.
5777 		if (parent == ioContext->root) {
5778 			entry->d_dev = parent->device;
5779 			entry->d_ino = parent->id;
5780 		} else {
5781 			inc_vnode_ref_count(parent);
5782 				// vnode_path_to_vnode() puts the node
5783 
5784 			// ".." is guaranteed not to be clobbered by this call
5785 			struct vnode* vnode;
5786 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5787 				ioContext, &vnode, NULL);
5788 
5789 			if (status == B_OK) {
5790 				entry->d_dev = vnode->device;
5791 				entry->d_ino = vnode->id;
5792 				put_vnode(vnode);
5793 			}
5794 		}
5795 	} else {
5796 		// resolve covered vnodes
5797 		ReadLocker _(&sVnodeLock);
5798 
5799 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5800 		if (vnode != NULL && vnode->covered_by != NULL) {
5801 			do {
5802 				vnode = vnode->covered_by;
5803 			} while (vnode->covered_by != NULL);
5804 
5805 			entry->d_dev = vnode->device;
5806 			entry->d_ino = vnode->id;
5807 		}
5808 	}
5809 
5810 	return B_OK;
5811 }
5812 
5813 
5814 static status_t
5815 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5816 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5817 {
5818 	if (!HAS_FS_CALL(vnode, read_dir))
5819 		return B_UNSUPPORTED;
5820 
5821 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5822 		_count);
5823 	if (error != B_OK)
5824 		return error;
5825 
5826 	// we need to adjust the read dirents
5827 	uint32 count = *_count;
5828 	for (uint32 i = 0; i < count; i++) {
5829 		error = fix_dirent(vnode, buffer, ioContext);
5830 		if (error != B_OK)
5831 			return error;
5832 
5833 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5834 	}
5835 
5836 	return error;
5837 }
5838 
5839 
5840 static status_t
5841 dir_rewind(struct file_descriptor* descriptor)
5842 {
5843 	struct vnode* vnode = descriptor->u.vnode;
5844 
5845 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5846 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5847 	}
5848 
5849 	return B_UNSUPPORTED;
5850 }
5851 
5852 
5853 static status_t
5854 dir_remove(int fd, char* path, bool kernel)
5855 {
5856 	char name[B_FILE_NAME_LENGTH];
5857 	struct vnode* directory;
5858 	status_t status;
5859 
5860 	if (path != NULL) {
5861 		// we need to make sure our path name doesn't stop with "/", ".",
5862 		// or ".."
5863 		char* lastSlash;
5864 		while ((lastSlash = strrchr(path, '/')) != NULL) {
5865 			char* leaf = lastSlash + 1;
5866 			if (!strcmp(leaf, ".."))
5867 				return B_NOT_ALLOWED;
5868 
5869 			// omit multiple slashes
5870 			while (lastSlash > path && lastSlash[-1] == '/')
5871 				lastSlash--;
5872 
5873 			if (leaf[0]
5874 				&& strcmp(leaf, ".")) {
5875 				break;
5876 			}
5877 			// "name/" -> "name", or "name/." -> "name"
5878 			lastSlash[0] = '\0';
5879 		}
5880 
5881 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5882 			return B_NOT_ALLOWED;
5883 	}
5884 
5885 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5886 	if (status != B_OK)
5887 		return status;
5888 
5889 	if (HAS_FS_CALL(directory, remove_dir))
5890 		status = FS_CALL(directory, remove_dir, name);
5891 	else
5892 		status = B_READ_ONLY_DEVICE;
5893 
5894 	put_vnode(directory);
5895 	return status;
5896 }
5897 
5898 
5899 static status_t
5900 common_ioctl(struct file_descriptor* descriptor, uint32 op, void* buffer,
5901 	size_t length)
5902 {
5903 	struct vnode* vnode = descriptor->u.vnode;
5904 
5905 	if (HAS_FS_CALL(vnode, ioctl))
5906 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5907 
5908 	return B_DEV_INVALID_IOCTL;
5909 }
5910 
5911 
5912 static status_t
5913 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5914 {
5915 	struct flock flock;
5916 
5917 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5918 		fd, op, argument, kernel ? "kernel" : "user"));
5919 
5920 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5921 		fd);
5922 	if (descriptor == NULL)
5923 		return B_FILE_ERROR;
5924 
5925 	struct vnode* vnode = fd_vnode(descriptor);
5926 
5927 	status_t status = B_OK;
5928 
5929 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5930 		if (descriptor->type != FDTYPE_FILE)
5931 			status = B_BAD_VALUE;
5932 		else if (user_memcpy(&flock, (struct flock*)argument,
5933 				sizeof(struct flock)) != B_OK)
5934 			status = B_BAD_ADDRESS;
5935 
5936 		if (status != B_OK) {
5937 			put_fd(descriptor);
5938 			return status;
5939 		}
5940 	}
5941 
5942 	switch (op) {
5943 		case F_SETFD:
5944 		{
5945 			struct io_context* context = get_current_io_context(kernel);
5946 			// Set file descriptor flags
5947 
5948 			// O_CLOEXEC is the only flag available at this time
5949 			mutex_lock(&context->io_mutex);
5950 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5951 			mutex_unlock(&context->io_mutex);
5952 
5953 			status = B_OK;
5954 			break;
5955 		}
5956 
5957 		case F_GETFD:
5958 		{
5959 			struct io_context* context = get_current_io_context(kernel);
5960 
5961 			// Get file descriptor flags
5962 			mutex_lock(&context->io_mutex);
5963 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5964 			mutex_unlock(&context->io_mutex);
5965 			break;
5966 		}
5967 
5968 		case F_SETFL:
5969 			// Set file descriptor open mode
5970 
5971 			// we only accept changes to O_APPEND and O_NONBLOCK
5972 			argument &= O_APPEND | O_NONBLOCK;
5973 			if (descriptor->ops->fd_set_flags != NULL) {
5974 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5975 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5976 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5977 					(int)argument);
5978 			} else
5979 				status = B_UNSUPPORTED;
5980 
5981 			if (status == B_OK) {
5982 				// update this descriptor's open_mode field
5983 				descriptor->open_mode = (descriptor->open_mode
5984 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5985 			}
5986 
5987 			break;
5988 
5989 		case F_GETFL:
5990 			// Get file descriptor open mode
5991 			status = descriptor->open_mode;
5992 			break;
5993 
5994 		case F_DUPFD:
5995 		{
5996 			struct io_context* context = get_current_io_context(kernel);
5997 
5998 			status = new_fd_etc(context, descriptor, (int)argument);
5999 			if (status >= 0) {
6000 				mutex_lock(&context->io_mutex);
6001 				fd_set_close_on_exec(context, fd, false);
6002 				mutex_unlock(&context->io_mutex);
6003 
6004 				atomic_add(&descriptor->ref_count, 1);
6005 			}
6006 			break;
6007 		}
6008 
6009 		case F_GETLK:
6010 			if (vnode != NULL) {
6011 				status = get_advisory_lock(vnode, &flock);
6012 				if (status == B_OK) {
6013 					// copy back flock structure
6014 					status = user_memcpy((struct flock*)argument, &flock,
6015 						sizeof(struct flock));
6016 				}
6017 			} else
6018 				status = B_BAD_VALUE;
6019 			break;
6020 
6021 		case F_SETLK:
6022 		case F_SETLKW:
6023 			status = normalize_flock(descriptor, &flock);
6024 			if (status != B_OK)
6025 				break;
6026 
6027 			if (vnode == NULL) {
6028 				status = B_BAD_VALUE;
6029 			} else if (flock.l_type == F_UNLCK) {
6030 				status = release_advisory_lock(vnode, &flock);
6031 			} else {
6032 				// the open mode must match the lock type
6033 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6034 						&& flock.l_type == F_WRLCK)
6035 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6036 						&& flock.l_type == F_RDLCK))
6037 					status = B_FILE_ERROR;
6038 				else {
6039 					status = acquire_advisory_lock(vnode, -1,
6040 						&flock, op == F_SETLKW);
6041 				}
6042 			}
6043 			break;
6044 
6045 		// ToDo: add support for more ops?
6046 
6047 		default:
6048 			status = B_BAD_VALUE;
6049 	}
6050 
6051 	put_fd(descriptor);
6052 	return status;
6053 }
6054 
6055 
6056 static status_t
6057 common_sync(int fd, bool kernel)
6058 {
6059 	struct file_descriptor* descriptor;
6060 	struct vnode* vnode;
6061 	status_t status;
6062 
6063 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6064 
6065 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6066 	if (descriptor == NULL)
6067 		return B_FILE_ERROR;
6068 
6069 	if (HAS_FS_CALL(vnode, fsync))
6070 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6071 	else
6072 		status = B_UNSUPPORTED;
6073 
6074 	put_fd(descriptor);
6075 	return status;
6076 }
6077 
6078 
6079 static status_t
6080 common_lock_node(int fd, bool kernel)
6081 {
6082 	struct file_descriptor* descriptor;
6083 	struct vnode* vnode;
6084 
6085 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6086 	if (descriptor == NULL)
6087 		return B_FILE_ERROR;
6088 
6089 	status_t status = B_OK;
6090 
6091 	// We need to set the locking atomically - someone
6092 	// else might set one at the same time
6093 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6094 			(file_descriptor*)NULL) != NULL)
6095 		status = B_BUSY;
6096 
6097 	put_fd(descriptor);
6098 	return status;
6099 }
6100 
6101 
6102 static status_t
6103 common_unlock_node(int fd, bool kernel)
6104 {
6105 	struct file_descriptor* descriptor;
6106 	struct vnode* vnode;
6107 
6108 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6109 	if (descriptor == NULL)
6110 		return B_FILE_ERROR;
6111 
6112 	status_t status = B_OK;
6113 
6114 	// We need to set the locking atomically - someone
6115 	// else might set one at the same time
6116 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6117 			(file_descriptor*)NULL, descriptor) != descriptor)
6118 		status = B_BAD_VALUE;
6119 
6120 	put_fd(descriptor);
6121 	return status;
6122 }
6123 
6124 
6125 static status_t
6126 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6127 	bool kernel)
6128 {
6129 	struct vnode* vnode;
6130 	status_t status;
6131 
6132 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6133 	if (status != B_OK)
6134 		return status;
6135 
6136 	if (HAS_FS_CALL(vnode, read_symlink)) {
6137 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6138 	} else
6139 		status = B_BAD_VALUE;
6140 
6141 	put_vnode(vnode);
6142 	return status;
6143 }
6144 
6145 
6146 static status_t
6147 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6148 	bool kernel)
6149 {
6150 	// path validity checks have to be in the calling function!
6151 	char name[B_FILE_NAME_LENGTH];
6152 	struct vnode* vnode;
6153 	status_t status;
6154 
6155 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6156 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6157 
6158 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6159 	if (status != B_OK)
6160 		return status;
6161 
6162 	if (HAS_FS_CALL(vnode, create_symlink))
6163 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6164 	else {
6165 		status = HAS_FS_CALL(vnode, write)
6166 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6167 	}
6168 
6169 	put_vnode(vnode);
6170 
6171 	return status;
6172 }
6173 
6174 
6175 static status_t
6176 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6177 	bool traverseLeafLink, bool kernel)
6178 {
6179 	// path validity checks have to be in the calling function!
6180 
6181 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6182 		toPath, kernel));
6183 
6184 	char name[B_FILE_NAME_LENGTH];
6185 	struct vnode* directory;
6186 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6187 		kernel);
6188 	if (status != B_OK)
6189 		return status;
6190 
6191 	struct vnode* vnode;
6192 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6193 		kernel);
6194 	if (status != B_OK)
6195 		goto err;
6196 
6197 	if (directory->mount != vnode->mount) {
6198 		status = B_CROSS_DEVICE_LINK;
6199 		goto err1;
6200 	}
6201 
6202 	if (HAS_FS_CALL(directory, link))
6203 		status = FS_CALL(directory, link, name, vnode);
6204 	else
6205 		status = B_READ_ONLY_DEVICE;
6206 
6207 err1:
6208 	put_vnode(vnode);
6209 err:
6210 	put_vnode(directory);
6211 
6212 	return status;
6213 }
6214 
6215 
6216 static status_t
6217 common_unlink(int fd, char* path, bool kernel)
6218 {
6219 	char filename[B_FILE_NAME_LENGTH];
6220 	struct vnode* vnode;
6221 	status_t status;
6222 
6223 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6224 		kernel));
6225 
6226 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6227 	if (status < 0)
6228 		return status;
6229 
6230 	if (HAS_FS_CALL(vnode, unlink))
6231 		status = FS_CALL(vnode, unlink, filename);
6232 	else
6233 		status = B_READ_ONLY_DEVICE;
6234 
6235 	put_vnode(vnode);
6236 
6237 	return status;
6238 }
6239 
6240 
6241 static status_t
6242 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6243 {
6244 	struct vnode* vnode;
6245 	status_t status;
6246 
6247 	// TODO: honor effectiveUserGroup argument
6248 
6249 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6250 	if (status != B_OK)
6251 		return status;
6252 
6253 	if (HAS_FS_CALL(vnode, access))
6254 		status = FS_CALL(vnode, access, mode);
6255 	else
6256 		status = B_OK;
6257 
6258 	put_vnode(vnode);
6259 
6260 	return status;
6261 }
6262 
6263 
6264 static status_t
6265 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6266 {
6267 	struct vnode* fromVnode;
6268 	struct vnode* toVnode;
6269 	char fromName[B_FILE_NAME_LENGTH];
6270 	char toName[B_FILE_NAME_LENGTH];
6271 	status_t status;
6272 
6273 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6274 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6275 
6276 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6277 	if (status != B_OK)
6278 		return status;
6279 
6280 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6281 	if (status != B_OK)
6282 		goto err1;
6283 
6284 	if (fromVnode->device != toVnode->device) {
6285 		status = B_CROSS_DEVICE_LINK;
6286 		goto err2;
6287 	}
6288 
6289 	if (fromName[0] == '\0' || toName[0] == '\0'
6290 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6291 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6292 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6293 		status = B_BAD_VALUE;
6294 		goto err2;
6295 	}
6296 
6297 	if (HAS_FS_CALL(fromVnode, rename))
6298 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6299 	else
6300 		status = B_READ_ONLY_DEVICE;
6301 
6302 err2:
6303 	put_vnode(toVnode);
6304 err1:
6305 	put_vnode(fromVnode);
6306 
6307 	return status;
6308 }
6309 
6310 
6311 static status_t
6312 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6313 {
6314 	struct vnode* vnode = descriptor->u.vnode;
6315 
6316 	FUNCTION(("common_read_stat: stat %p\n", stat));
6317 
6318 	// TODO: remove this once all file systems properly set them!
6319 	stat->st_crtim.tv_nsec = 0;
6320 	stat->st_ctim.tv_nsec = 0;
6321 	stat->st_mtim.tv_nsec = 0;
6322 	stat->st_atim.tv_nsec = 0;
6323 
6324 	status_t status = FS_CALL(vnode, read_stat, stat);
6325 
6326 	// fill in the st_dev and st_ino fields
6327 	if (status == B_OK) {
6328 		stat->st_dev = vnode->device;
6329 		stat->st_ino = vnode->id;
6330 		stat->st_rdev = -1;
6331 	}
6332 
6333 	return status;
6334 }
6335 
6336 
6337 static status_t
6338 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6339 	int statMask)
6340 {
6341 	struct vnode* vnode = descriptor->u.vnode;
6342 
6343 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6344 		vnode, stat, statMask));
6345 
6346 	if (!HAS_FS_CALL(vnode, write_stat))
6347 		return B_READ_ONLY_DEVICE;
6348 
6349 	return FS_CALL(vnode, write_stat, stat, statMask);
6350 }
6351 
6352 
6353 static status_t
6354 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6355 	struct stat* stat, bool kernel)
6356 {
6357 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6358 		stat));
6359 
6360 	struct vnode* vnode;
6361 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6362 		NULL, kernel);
6363 	if (status != B_OK)
6364 		return status;
6365 
6366 	status = FS_CALL(vnode, read_stat, stat);
6367 
6368 	// fill in the st_dev and st_ino fields
6369 	if (status == B_OK) {
6370 		stat->st_dev = vnode->device;
6371 		stat->st_ino = vnode->id;
6372 		stat->st_rdev = -1;
6373 	}
6374 
6375 	put_vnode(vnode);
6376 	return status;
6377 }
6378 
6379 
6380 static status_t
6381 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6382 	const struct stat* stat, int statMask, bool kernel)
6383 {
6384 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6385 		"kernel %d\n", fd, path, stat, statMask, kernel));
6386 
6387 	struct vnode* vnode;
6388 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6389 		NULL, kernel);
6390 	if (status != B_OK)
6391 		return status;
6392 
6393 	if (HAS_FS_CALL(vnode, write_stat))
6394 		status = FS_CALL(vnode, write_stat, stat, statMask);
6395 	else
6396 		status = B_READ_ONLY_DEVICE;
6397 
6398 	put_vnode(vnode);
6399 
6400 	return status;
6401 }
6402 
6403 
6404 static int
6405 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6406 {
6407 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6408 		kernel));
6409 
6410 	struct vnode* vnode;
6411 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6412 		NULL, kernel);
6413 	if (status != B_OK)
6414 		return status;
6415 
6416 	status = open_attr_dir_vnode(vnode, kernel);
6417 	if (status < 0)
6418 		put_vnode(vnode);
6419 
6420 	return status;
6421 }
6422 
6423 
6424 static status_t
6425 attr_dir_close(struct file_descriptor* descriptor)
6426 {
6427 	struct vnode* vnode = descriptor->u.vnode;
6428 
6429 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6430 
6431 	if (HAS_FS_CALL(vnode, close_attr_dir))
6432 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6433 
6434 	return B_OK;
6435 }
6436 
6437 
6438 static void
6439 attr_dir_free_fd(struct file_descriptor* descriptor)
6440 {
6441 	struct vnode* vnode = descriptor->u.vnode;
6442 
6443 	if (vnode != NULL) {
6444 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6445 		put_vnode(vnode);
6446 	}
6447 }
6448 
6449 
6450 static status_t
6451 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6452 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6453 {
6454 	struct vnode* vnode = descriptor->u.vnode;
6455 
6456 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6457 
6458 	if (HAS_FS_CALL(vnode, read_attr_dir))
6459 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6460 			bufferSize, _count);
6461 
6462 	return B_UNSUPPORTED;
6463 }
6464 
6465 
6466 static status_t
6467 attr_dir_rewind(struct file_descriptor* descriptor)
6468 {
6469 	struct vnode* vnode = descriptor->u.vnode;
6470 
6471 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6472 
6473 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6474 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6475 
6476 	return B_UNSUPPORTED;
6477 }
6478 
6479 
6480 static int
6481 attr_create(int fd, char* path, const char* name, uint32 type,
6482 	int openMode, bool kernel)
6483 {
6484 	if (name == NULL || *name == '\0')
6485 		return B_BAD_VALUE;
6486 
6487 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6488 	struct vnode* vnode;
6489 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6490 		kernel);
6491 	if (status != B_OK)
6492 		return status;
6493 
6494 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6495 		status = B_LINK_LIMIT;
6496 		goto err;
6497 	}
6498 
6499 	if (!HAS_FS_CALL(vnode, create_attr)) {
6500 		status = B_READ_ONLY_DEVICE;
6501 		goto err;
6502 	}
6503 
6504 	void* cookie;
6505 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6506 	if (status != B_OK)
6507 		goto err;
6508 
6509 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6510 	if (fd >= 0)
6511 		return fd;
6512 
6513 	status = fd;
6514 
6515 	FS_CALL(vnode, close_attr, cookie);
6516 	FS_CALL(vnode, free_attr_cookie, cookie);
6517 
6518 	FS_CALL(vnode, remove_attr, name);
6519 
6520 err:
6521 	put_vnode(vnode);
6522 
6523 	return status;
6524 }
6525 
6526 
6527 static int
6528 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6529 {
6530 	if (name == NULL || *name == '\0')
6531 		return B_BAD_VALUE;
6532 
6533 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6534 	struct vnode* vnode;
6535 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6536 		kernel);
6537 	if (status != B_OK)
6538 		return status;
6539 
6540 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6541 		status = B_LINK_LIMIT;
6542 		goto err;
6543 	}
6544 
6545 	if (!HAS_FS_CALL(vnode, open_attr)) {
6546 		status = B_UNSUPPORTED;
6547 		goto err;
6548 	}
6549 
6550 	void* cookie;
6551 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6552 	if (status != B_OK)
6553 		goto err;
6554 
6555 	// now we only need a file descriptor for this attribute and we're done
6556 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6557 	if (fd >= 0)
6558 		return fd;
6559 
6560 	status = fd;
6561 
6562 	FS_CALL(vnode, close_attr, cookie);
6563 	FS_CALL(vnode, free_attr_cookie, cookie);
6564 
6565 err:
6566 	put_vnode(vnode);
6567 
6568 	return status;
6569 }
6570 
6571 
6572 static status_t
6573 attr_close(struct file_descriptor* descriptor)
6574 {
6575 	struct vnode* vnode = descriptor->u.vnode;
6576 
6577 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6578 
6579 	if (HAS_FS_CALL(vnode, close_attr))
6580 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6581 
6582 	return B_OK;
6583 }
6584 
6585 
6586 static void
6587 attr_free_fd(struct file_descriptor* descriptor)
6588 {
6589 	struct vnode* vnode = descriptor->u.vnode;
6590 
6591 	if (vnode != NULL) {
6592 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6593 		put_vnode(vnode);
6594 	}
6595 }
6596 
6597 
6598 static status_t
6599 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6600 	size_t* length)
6601 {
6602 	struct vnode* vnode = descriptor->u.vnode;
6603 
6604 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6605 		*length));
6606 
6607 	if (!HAS_FS_CALL(vnode, read_attr))
6608 		return B_UNSUPPORTED;
6609 
6610 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6611 }
6612 
6613 
6614 static status_t
6615 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6616 	size_t* length)
6617 {
6618 	struct vnode* vnode = descriptor->u.vnode;
6619 
6620 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6621 	if (!HAS_FS_CALL(vnode, write_attr))
6622 		return B_UNSUPPORTED;
6623 
6624 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6625 }
6626 
6627 
6628 static off_t
6629 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6630 {
6631 	off_t offset;
6632 
6633 	switch (seekType) {
6634 		case SEEK_SET:
6635 			offset = 0;
6636 			break;
6637 		case SEEK_CUR:
6638 			offset = descriptor->pos;
6639 			break;
6640 		case SEEK_END:
6641 		{
6642 			struct vnode* vnode = descriptor->u.vnode;
6643 			if (!HAS_FS_CALL(vnode, read_stat))
6644 				return B_UNSUPPORTED;
6645 
6646 			struct stat stat;
6647 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6648 				&stat);
6649 			if (status != B_OK)
6650 				return status;
6651 
6652 			offset = stat.st_size;
6653 			break;
6654 		}
6655 		default:
6656 			return B_BAD_VALUE;
6657 	}
6658 
6659 	// assumes off_t is 64 bits wide
6660 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6661 		return B_BUFFER_OVERFLOW;
6662 
6663 	pos += offset;
6664 	if (pos < 0)
6665 		return B_BAD_VALUE;
6666 
6667 	return descriptor->pos = pos;
6668 }
6669 
6670 
6671 static status_t
6672 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6673 {
6674 	struct vnode* vnode = descriptor->u.vnode;
6675 
6676 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6677 
6678 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6679 		return B_UNSUPPORTED;
6680 
6681 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6682 }
6683 
6684 
6685 static status_t
6686 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6687 	int statMask)
6688 {
6689 	struct vnode* vnode = descriptor->u.vnode;
6690 
6691 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6692 
6693 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6694 		return B_READ_ONLY_DEVICE;
6695 
6696 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6697 }
6698 
6699 
6700 static status_t
6701 attr_remove(int fd, const char* name, bool kernel)
6702 {
6703 	struct file_descriptor* descriptor;
6704 	struct vnode* vnode;
6705 	status_t status;
6706 
6707 	if (name == NULL || *name == '\0')
6708 		return B_BAD_VALUE;
6709 
6710 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6711 		kernel));
6712 
6713 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6714 	if (descriptor == NULL)
6715 		return B_FILE_ERROR;
6716 
6717 	if (HAS_FS_CALL(vnode, remove_attr))
6718 		status = FS_CALL(vnode, remove_attr, name);
6719 	else
6720 		status = B_READ_ONLY_DEVICE;
6721 
6722 	put_fd(descriptor);
6723 
6724 	return status;
6725 }
6726 
6727 
6728 static status_t
6729 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6730 	bool kernel)
6731 {
6732 	struct file_descriptor* fromDescriptor;
6733 	struct file_descriptor* toDescriptor;
6734 	struct vnode* fromVnode;
6735 	struct vnode* toVnode;
6736 	status_t status;
6737 
6738 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6739 		|| *toName == '\0')
6740 		return B_BAD_VALUE;
6741 
6742 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6743 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6744 
6745 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6746 	if (fromDescriptor == NULL)
6747 		return B_FILE_ERROR;
6748 
6749 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6750 	if (toDescriptor == NULL) {
6751 		status = B_FILE_ERROR;
6752 		goto err;
6753 	}
6754 
6755 	// are the files on the same volume?
6756 	if (fromVnode->device != toVnode->device) {
6757 		status = B_CROSS_DEVICE_LINK;
6758 		goto err1;
6759 	}
6760 
6761 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6762 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6763 	} else
6764 		status = B_READ_ONLY_DEVICE;
6765 
6766 err1:
6767 	put_fd(toDescriptor);
6768 err:
6769 	put_fd(fromDescriptor);
6770 
6771 	return status;
6772 }
6773 
6774 
6775 static int
6776 index_dir_open(dev_t mountID, bool kernel)
6777 {
6778 	struct fs_mount* mount;
6779 	void* cookie;
6780 
6781 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6782 
6783 	status_t status = get_mount(mountID, &mount);
6784 	if (status != B_OK)
6785 		return status;
6786 
6787 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6788 		status = B_UNSUPPORTED;
6789 		goto error;
6790 	}
6791 
6792 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6793 	if (status != B_OK)
6794 		goto error;
6795 
6796 	// get fd for the index directory
6797 	int fd;
6798 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6799 	if (fd >= 0)
6800 		return fd;
6801 
6802 	// something went wrong
6803 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6804 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6805 
6806 	status = fd;
6807 
6808 error:
6809 	put_mount(mount);
6810 	return status;
6811 }
6812 
6813 
6814 static status_t
6815 index_dir_close(struct file_descriptor* descriptor)
6816 {
6817 	struct fs_mount* mount = descriptor->u.mount;
6818 
6819 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6820 
6821 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6822 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6823 
6824 	return B_OK;
6825 }
6826 
6827 
6828 static void
6829 index_dir_free_fd(struct file_descriptor* descriptor)
6830 {
6831 	struct fs_mount* mount = descriptor->u.mount;
6832 
6833 	if (mount != NULL) {
6834 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6835 		put_mount(mount);
6836 	}
6837 }
6838 
6839 
6840 static status_t
6841 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6842 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6843 {
6844 	struct fs_mount* mount = descriptor->u.mount;
6845 
6846 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6847 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6848 			bufferSize, _count);
6849 	}
6850 
6851 	return B_UNSUPPORTED;
6852 }
6853 
6854 
6855 static status_t
6856 index_dir_rewind(struct file_descriptor* descriptor)
6857 {
6858 	struct fs_mount* mount = descriptor->u.mount;
6859 
6860 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6861 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6862 
6863 	return B_UNSUPPORTED;
6864 }
6865 
6866 
6867 static status_t
6868 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6869 	bool kernel)
6870 {
6871 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6872 		name, kernel));
6873 
6874 	struct fs_mount* mount;
6875 	status_t status = get_mount(mountID, &mount);
6876 	if (status != B_OK)
6877 		return status;
6878 
6879 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6880 		status = B_READ_ONLY_DEVICE;
6881 		goto out;
6882 	}
6883 
6884 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6885 
6886 out:
6887 	put_mount(mount);
6888 	return status;
6889 }
6890 
6891 
6892 #if 0
6893 static status_t
6894 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6895 {
6896 	struct vnode* vnode = descriptor->u.vnode;
6897 
6898 	// ToDo: currently unused!
6899 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6900 	if (!HAS_FS_CALL(vnode, read_index_stat))
6901 		return B_UNSUPPORTED;
6902 
6903 	return B_UNSUPPORTED;
6904 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6905 }
6906 
6907 
6908 static void
6909 index_free_fd(struct file_descriptor* descriptor)
6910 {
6911 	struct vnode* vnode = descriptor->u.vnode;
6912 
6913 	if (vnode != NULL) {
6914 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6915 		put_vnode(vnode);
6916 	}
6917 }
6918 #endif
6919 
6920 
6921 static status_t
6922 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6923 	bool kernel)
6924 {
6925 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6926 		name, kernel));
6927 
6928 	struct fs_mount* mount;
6929 	status_t status = get_mount(mountID, &mount);
6930 	if (status != B_OK)
6931 		return status;
6932 
6933 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6934 		status = B_UNSUPPORTED;
6935 		goto out;
6936 	}
6937 
6938 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6939 
6940 out:
6941 	put_mount(mount);
6942 	return status;
6943 }
6944 
6945 
6946 static status_t
6947 index_remove(dev_t mountID, const char* name, bool kernel)
6948 {
6949 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6950 		name, kernel));
6951 
6952 	struct fs_mount* mount;
6953 	status_t status = get_mount(mountID, &mount);
6954 	if (status != B_OK)
6955 		return status;
6956 
6957 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6958 		status = B_READ_ONLY_DEVICE;
6959 		goto out;
6960 	}
6961 
6962 	status = FS_MOUNT_CALL(mount, remove_index, name);
6963 
6964 out:
6965 	put_mount(mount);
6966 	return status;
6967 }
6968 
6969 
6970 /*!	TODO: the query FS API is still the pretty much the same as in R5.
6971 		It would be nice if the FS would find some more kernel support
6972 		for them.
6973 		For example, query parsing should be moved into the kernel.
6974 */
6975 static int
6976 query_open(dev_t device, const char* query, uint32 flags, port_id port,
6977 	int32 token, bool kernel)
6978 {
6979 	struct fs_mount* mount;
6980 	void* cookie;
6981 
6982 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
6983 		query, kernel));
6984 
6985 	status_t status = get_mount(device, &mount);
6986 	if (status != B_OK)
6987 		return status;
6988 
6989 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6990 		status = B_UNSUPPORTED;
6991 		goto error;
6992 	}
6993 
6994 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6995 		&cookie);
6996 	if (status != B_OK)
6997 		goto error;
6998 
6999 	// get fd for the index directory
7000 	int fd;
7001 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7002 	if (fd >= 0)
7003 		return fd;
7004 
7005 	status = fd;
7006 
7007 	// something went wrong
7008 	FS_MOUNT_CALL(mount, close_query, cookie);
7009 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7010 
7011 error:
7012 	put_mount(mount);
7013 	return status;
7014 }
7015 
7016 
7017 static status_t
7018 query_close(struct file_descriptor* descriptor)
7019 {
7020 	struct fs_mount* mount = descriptor->u.mount;
7021 
7022 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7023 
7024 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7025 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7026 
7027 	return B_OK;
7028 }
7029 
7030 
7031 static void
7032 query_free_fd(struct file_descriptor* descriptor)
7033 {
7034 	struct fs_mount* mount = descriptor->u.mount;
7035 
7036 	if (mount != NULL) {
7037 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7038 		put_mount(mount);
7039 	}
7040 }
7041 
7042 
7043 static status_t
7044 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7045 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7046 {
7047 	struct fs_mount* mount = descriptor->u.mount;
7048 
7049 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7050 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7051 			bufferSize, _count);
7052 	}
7053 
7054 	return B_UNSUPPORTED;
7055 }
7056 
7057 
7058 static status_t
7059 query_rewind(struct file_descriptor* descriptor)
7060 {
7061 	struct fs_mount* mount = descriptor->u.mount;
7062 
7063 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7064 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7065 
7066 	return B_UNSUPPORTED;
7067 }
7068 
7069 
7070 //	#pragma mark - General File System functions
7071 
7072 
7073 static dev_t
7074 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7075 	const char* args, bool kernel)
7076 {
7077 	struct ::fs_mount* mount;
7078 	status_t status = B_OK;
7079 	fs_volume* volume = NULL;
7080 	int32 layer = 0;
7081 	Vnode* coveredNode = NULL;
7082 
7083 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
7084 
7085 	// The path is always safe, we just have to make sure that fsName is
7086 	// almost valid - we can't make any assumptions about args, though.
7087 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7088 	// We'll get it from the DDM later.
7089 	if (fsName == NULL) {
7090 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7091 			return B_BAD_VALUE;
7092 	} else if (fsName[0] == '\0')
7093 		return B_BAD_VALUE;
7094 
7095 	RecursiveLocker mountOpLocker(sMountOpLock);
7096 
7097 	// Helper to delete a newly created file device on failure.
7098 	// Not exactly beautiful, but helps to keep the code below cleaner.
7099 	struct FileDeviceDeleter {
7100 		FileDeviceDeleter() : id(-1) {}
7101 		~FileDeviceDeleter()
7102 		{
7103 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7104 		}
7105 
7106 		partition_id id;
7107 	} fileDeviceDeleter;
7108 
7109 	// If the file system is not a "virtual" one, the device argument should
7110 	// point to a real file/device (if given at all).
7111 	// get the partition
7112 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7113 	KPartition* partition = NULL;
7114 	KPath normalizedDevice;
7115 	bool newlyCreatedFileDevice = false;
7116 
7117 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7118 		// normalize the device path
7119 		status = normalizedDevice.SetTo(device, true);
7120 		if (status != B_OK)
7121 			return status;
7122 
7123 		// get a corresponding partition from the DDM
7124 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7125 		if (partition == NULL) {
7126 			// Partition not found: This either means, the user supplied
7127 			// an invalid path, or the path refers to an image file. We try
7128 			// to let the DDM create a file device for the path.
7129 			partition_id deviceID = ddm->CreateFileDevice(
7130 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7131 			if (deviceID >= 0) {
7132 				partition = ddm->RegisterPartition(deviceID);
7133 				if (newlyCreatedFileDevice)
7134 					fileDeviceDeleter.id = deviceID;
7135 			}
7136 		}
7137 
7138 		if (!partition) {
7139 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7140 				normalizedDevice.Path()));
7141 			return B_ENTRY_NOT_FOUND;
7142 		}
7143 
7144 		device = normalizedDevice.Path();
7145 			// correct path to file device
7146 	}
7147 	PartitionRegistrar partitionRegistrar(partition, true);
7148 
7149 	// Write lock the partition's device. For the time being, we keep the lock
7150 	// until we're done mounting -- not nice, but ensure, that no-one is
7151 	// interfering.
7152 	// TODO: Just mark the partition busy while mounting!
7153 	KDiskDevice* diskDevice = NULL;
7154 	if (partition) {
7155 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7156 		if (!diskDevice) {
7157 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7158 			return B_ERROR;
7159 		}
7160 	}
7161 
7162 	DeviceWriteLocker writeLocker(diskDevice, true);
7163 		// this takes over the write lock acquired before
7164 
7165 	if (partition != NULL) {
7166 		// make sure, that the partition is not busy
7167 		if (partition->IsBusy()) {
7168 			TRACE(("fs_mount(): Partition is busy.\n"));
7169 			return B_BUSY;
7170 		}
7171 
7172 		// if no FS name had been supplied, we get it from the partition
7173 		if (fsName == NULL) {
7174 			KDiskSystem* diskSystem = partition->DiskSystem();
7175 			if (!diskSystem) {
7176 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7177 					"recognize it.\n"));
7178 				return B_BAD_VALUE;
7179 			}
7180 
7181 			if (!diskSystem->IsFileSystem()) {
7182 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7183 					"partitioning system.\n"));
7184 				return B_BAD_VALUE;
7185 			}
7186 
7187 			// The disk system name will not change, and the KDiskSystem
7188 			// object will not go away while the disk device is locked (and
7189 			// the partition has a reference to it), so this is safe.
7190 			fsName = diskSystem->Name();
7191 		}
7192 	}
7193 
7194 	mount = new(std::nothrow) (struct ::fs_mount);
7195 	if (mount == NULL)
7196 		return B_NO_MEMORY;
7197 
7198 	mount->device_name = strdup(device);
7199 		// "device" can be NULL
7200 
7201 	status = mount->entry_cache.Init();
7202 	if (status != B_OK)
7203 		goto err1;
7204 
7205 	// initialize structure
7206 	mount->id = sNextMountID++;
7207 	mount->partition = NULL;
7208 	mount->root_vnode = NULL;
7209 	mount->covers_vnode = NULL;
7210 	mount->unmounting = false;
7211 	mount->owns_file_device = false;
7212 	mount->volume = NULL;
7213 
7214 	// build up the volume(s)
7215 	while (true) {
7216 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7217 		if (layerFSName == NULL) {
7218 			if (layer == 0) {
7219 				status = B_NO_MEMORY;
7220 				goto err1;
7221 			}
7222 
7223 			break;
7224 		}
7225 
7226 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7227 		if (volume == NULL) {
7228 			status = B_NO_MEMORY;
7229 			free(layerFSName);
7230 			goto err1;
7231 		}
7232 
7233 		volume->id = mount->id;
7234 		volume->partition = partition != NULL ? partition->ID() : -1;
7235 		volume->layer = layer++;
7236 		volume->private_volume = NULL;
7237 		volume->ops = NULL;
7238 		volume->sub_volume = NULL;
7239 		volume->super_volume = NULL;
7240 		volume->file_system = NULL;
7241 		volume->file_system_name = NULL;
7242 
7243 		volume->file_system_name = get_file_system_name(layerFSName);
7244 		if (volume->file_system_name == NULL) {
7245 			status = B_NO_MEMORY;
7246 			free(layerFSName);
7247 			free(volume);
7248 			goto err1;
7249 		}
7250 
7251 		volume->file_system = get_file_system(layerFSName);
7252 		if (volume->file_system == NULL) {
7253 			status = B_DEVICE_NOT_FOUND;
7254 			free(layerFSName);
7255 			free(volume->file_system_name);
7256 			free(volume);
7257 			goto err1;
7258 		}
7259 
7260 		if (mount->volume == NULL)
7261 			mount->volume = volume;
7262 		else {
7263 			volume->super_volume = mount->volume;
7264 			mount->volume->sub_volume = volume;
7265 			mount->volume = volume;
7266 		}
7267 	}
7268 
7269 	// insert mount struct into list before we call FS's mount() function
7270 	// so that vnodes can be created for this mount
7271 	mutex_lock(&sMountMutex);
7272 	hash_insert(sMountsTable, mount);
7273 	mutex_unlock(&sMountMutex);
7274 
7275 	ino_t rootID;
7276 
7277 	if (!sRoot) {
7278 		// we haven't mounted anything yet
7279 		if (strcmp(path, "/") != 0) {
7280 			status = B_ERROR;
7281 			goto err2;
7282 		}
7283 
7284 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7285 			args, &rootID);
7286 		if (status != 0)
7287 			goto err2;
7288 	} else {
7289 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7290 		if (status != B_OK)
7291 			goto err2;
7292 
7293 		mount->covers_vnode = coveredNode;
7294 
7295 		// make sure covered_vnode is a directory
7296 		if (!S_ISDIR(coveredNode->Type())) {
7297 			status = B_NOT_A_DIRECTORY;
7298 			goto err3;
7299 		}
7300 
7301 		if (coveredNode->IsCovered()) {
7302 			// this is already a covered vnode
7303 			status = B_BUSY;
7304 			goto err3;
7305 		}
7306 
7307 		// mount it/them
7308 		fs_volume* volume = mount->volume;
7309 		while (volume) {
7310 			status = volume->file_system->mount(volume, device, flags, args,
7311 				&rootID);
7312 			if (status != B_OK) {
7313 				if (volume->sub_volume)
7314 					goto err4;
7315 				goto err3;
7316 			}
7317 
7318 			volume = volume->super_volume;
7319 		}
7320 
7321 		volume = mount->volume;
7322 		while (volume) {
7323 			if (volume->ops->all_layers_mounted != NULL)
7324 				volume->ops->all_layers_mounted(volume);
7325 			volume = volume->super_volume;
7326 		}
7327 	}
7328 
7329 	// the root node is supposed to be owned by the file system - it must
7330 	// exist at this point
7331 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7332 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7333 		panic("fs_mount: file system does not own its root node!\n");
7334 		status = B_ERROR;
7335 		goto err4;
7336 	}
7337 
7338 	// set up the links between the root vnode and the vnode it covers
7339 	rw_lock_write_lock(&sVnodeLock);
7340 	if (coveredNode != NULL) {
7341 		if (coveredNode->IsCovered()) {
7342 			// the vnode is covered now
7343 			status = B_BUSY;
7344 			rw_lock_write_unlock(&sVnodeLock);
7345 			goto err4;
7346 		}
7347 
7348 		mount->root_vnode->covers = coveredNode;
7349 		mount->root_vnode->SetCovering(true);
7350 
7351 		coveredNode->covered_by = mount->root_vnode;
7352 		coveredNode->SetCovered(true);
7353 	}
7354 	rw_lock_write_unlock(&sVnodeLock);
7355 
7356 	if (!sRoot) {
7357 		sRoot = mount->root_vnode;
7358 		mutex_lock(&sIOContextRootLock);
7359 		get_current_io_context(true)->root = sRoot;
7360 		mutex_unlock(&sIOContextRootLock);
7361 		inc_vnode_ref_count(sRoot);
7362 	}
7363 
7364 	// supply the partition (if any) with the mount cookie and mark it mounted
7365 	if (partition) {
7366 		partition->SetMountCookie(mount->volume->private_volume);
7367 		partition->SetVolumeID(mount->id);
7368 
7369 		// keep a partition reference as long as the partition is mounted
7370 		partitionRegistrar.Detach();
7371 		mount->partition = partition;
7372 		mount->owns_file_device = newlyCreatedFileDevice;
7373 		fileDeviceDeleter.id = -1;
7374 	}
7375 
7376 	notify_mount(mount->id,
7377 		coveredNode != NULL ? coveredNode->device : -1,
7378 		coveredNode ? coveredNode->id : -1);
7379 
7380 	return mount->id;
7381 
7382 err4:
7383 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7384 err3:
7385 	if (coveredNode != NULL)
7386 		put_vnode(coveredNode);
7387 err2:
7388 	mutex_lock(&sMountMutex);
7389 	hash_remove(sMountsTable, mount);
7390 	mutex_unlock(&sMountMutex);
7391 err1:
7392 	delete mount;
7393 
7394 	return status;
7395 }
7396 
7397 
7398 static status_t
7399 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7400 {
7401 	struct fs_mount* mount;
7402 	status_t err;
7403 
7404 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7405 		kernel));
7406 
7407 	struct vnode* pathVnode = NULL;
7408 	if (path != NULL) {
7409 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7410 		if (err != B_OK)
7411 			return B_ENTRY_NOT_FOUND;
7412 	}
7413 
7414 	RecursiveLocker mountOpLocker(sMountOpLock);
7415 
7416 	// this lock is not strictly necessary, but here in case of KDEBUG
7417 	// to keep the ASSERT in find_mount() working.
7418 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7419 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7420 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7421 	if (mount == NULL) {
7422 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7423 			pathVnode);
7424 	}
7425 
7426 	if (path != NULL) {
7427 		put_vnode(pathVnode);
7428 
7429 		if (mount->root_vnode != pathVnode) {
7430 			// not mountpoint
7431 			return B_BAD_VALUE;
7432 		}
7433 	}
7434 
7435 	// if the volume is associated with a partition, lock the device of the
7436 	// partition as long as we are unmounting
7437 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7438 	KPartition* partition = mount->partition;
7439 	KDiskDevice* diskDevice = NULL;
7440 	if (partition != NULL) {
7441 		if (partition->Device() == NULL) {
7442 			dprintf("fs_unmount(): There is no device!\n");
7443 			return B_ERROR;
7444 		}
7445 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7446 		if (!diskDevice) {
7447 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7448 			return B_ERROR;
7449 		}
7450 	}
7451 	DeviceWriteLocker writeLocker(diskDevice, true);
7452 
7453 	// make sure, that the partition is not busy
7454 	if (partition != NULL) {
7455 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7456 			TRACE(("fs_unmount(): Partition is busy.\n"));
7457 			return B_BUSY;
7458 		}
7459 	}
7460 
7461 	// grab the vnode master mutex to keep someone from creating
7462 	// a vnode while we're figuring out if we can continue
7463 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7464 
7465 	bool disconnectedDescriptors = false;
7466 
7467 	while (true) {
7468 		bool busy = false;
7469 
7470 		// cycle through the list of vnodes associated with this mount and
7471 		// make sure all of them are not busy or have refs on them
7472 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7473 		while (struct vnode* vnode = iterator.Next()) {
7474 			if (vnode->IsBusy()) {
7475 				busy = true;
7476 				break;
7477 			}
7478 
7479 			// check the vnode's ref count -- subtract additional references for
7480 			// covering
7481 			int32 refCount = vnode->ref_count;
7482 			if (vnode->covers != NULL)
7483 				refCount--;
7484 			if (vnode->covered_by != NULL)
7485 				refCount--;
7486 
7487 			if (refCount != 0) {
7488 				// there are still vnodes in use on this mount, so we cannot
7489 				// unmount yet
7490 				busy = true;
7491 				break;
7492 			}
7493 		}
7494 
7495 		if (!busy)
7496 			break;
7497 
7498 		if ((flags & B_FORCE_UNMOUNT) == 0)
7499 			return B_BUSY;
7500 
7501 		if (disconnectedDescriptors) {
7502 			// wait a bit until the last access is finished, and then try again
7503 			vnodesWriteLocker.Unlock();
7504 			snooze(100000);
7505 			// TODO: if there is some kind of bug that prevents the ref counts
7506 			// from getting back to zero, this will fall into an endless loop...
7507 			vnodesWriteLocker.Lock();
7508 			continue;
7509 		}
7510 
7511 		// the file system is still busy - but we're forced to unmount it,
7512 		// so let's disconnect all open file descriptors
7513 
7514 		mount->unmounting = true;
7515 			// prevent new vnodes from being created
7516 
7517 		vnodesWriteLocker.Unlock();
7518 
7519 		disconnect_mount_or_vnode_fds(mount, NULL);
7520 		disconnectedDescriptors = true;
7521 
7522 		vnodesWriteLocker.Lock();
7523 	}
7524 
7525 	// We can safely continue. Mark all of the vnodes busy and this mount
7526 	// structure in unmounting state. Also undo the vnode covers/covered_by
7527 	// links.
7528 	mount->unmounting = true;
7529 
7530 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7531 	while (struct vnode* vnode = iterator.Next()) {
7532 		// Remove all covers/covered_by links from other mounts' nodes to this
7533 		// vnode and adjust the node ref count accordingly. We will release the
7534 		// references to the external vnodes below.
7535 		if (Vnode* coveredNode = vnode->covers) {
7536 			if (Vnode* coveringNode = vnode->covered_by) {
7537 				// We have both covered and covering vnodes, so just remove us
7538 				// from the chain.
7539 				coveredNode->covered_by = coveringNode;
7540 				coveringNode->covers = coveredNode;
7541 				vnode->ref_count -= 2;
7542 
7543 				vnode->covered_by = NULL;
7544 				vnode->covers = NULL;
7545 				vnode->SetCovering(false);
7546 				vnode->SetCovered(false);
7547 			} else {
7548 				// We only have a covered vnode. Remove its link to us.
7549 				coveredNode->covered_by = NULL;
7550 				coveredNode->SetCovered(false);
7551 				vnode->ref_count--;
7552 
7553 				// If the other node is an external vnode, we keep its link
7554 				// link around so we can put the reference later on. Otherwise
7555 				// we get rid of it right now.
7556 				if (coveredNode->mount == mount) {
7557 					vnode->covers = NULL;
7558 					coveredNode->ref_count--;
7559 				}
7560 			}
7561 		} else if (Vnode* coveringNode = vnode->covered_by) {
7562 			// We only have a covering vnode. Remove its link to us.
7563 			coveringNode->covers = NULL;
7564 			coveringNode->SetCovering(false);
7565 			vnode->ref_count--;
7566 
7567 			// If the other node is an external vnode, we keep its link
7568 			// link around so we can put the reference later on. Otherwise
7569 			// we get rid of it right now.
7570 			if (coveringNode->mount == mount) {
7571 				vnode->covered_by = NULL;
7572 				coveringNode->ref_count--;
7573 			}
7574 		}
7575 
7576 		vnode->SetBusy(true);
7577 		vnode_to_be_freed(vnode);
7578 	}
7579 
7580 	vnodesWriteLocker.Unlock();
7581 
7582 	// Free all vnodes associated with this mount.
7583 	// They will be removed from the mount list by free_vnode(), so
7584 	// we don't have to do this.
7585 	while (struct vnode* vnode = mount->vnodes.Head()) {
7586 		// Put the references to external covered/covering vnodes we kept above.
7587 		if (Vnode* coveredNode = vnode->covers)
7588 			put_vnode(coveredNode);
7589 		if (Vnode* coveringNode = vnode->covered_by)
7590 			put_vnode(coveringNode);
7591 
7592 		free_vnode(vnode, false);
7593 	}
7594 
7595 	// remove the mount structure from the hash table
7596 	mutex_lock(&sMountMutex);
7597 	hash_remove(sMountsTable, mount);
7598 	mutex_unlock(&sMountMutex);
7599 
7600 	mountOpLocker.Unlock();
7601 
7602 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7603 	notify_unmount(mount->id);
7604 
7605 	// dereference the partition and mark it unmounted
7606 	if (partition) {
7607 		partition->SetVolumeID(-1);
7608 		partition->SetMountCookie(NULL);
7609 
7610 		if (mount->owns_file_device)
7611 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7612 		partition->Unregister();
7613 	}
7614 
7615 	delete mount;
7616 	return B_OK;
7617 }
7618 
7619 
7620 static status_t
7621 fs_sync(dev_t device)
7622 {
7623 	struct fs_mount* mount;
7624 	status_t status = get_mount(device, &mount);
7625 	if (status != B_OK)
7626 		return status;
7627 
7628 	struct vnode marker;
7629 	memset(&marker, 0, sizeof(marker));
7630 	marker.SetBusy(true);
7631 	marker.SetRemoved(true);
7632 
7633 	// First, synchronize all file caches
7634 
7635 	while (true) {
7636 		WriteLocker locker(sVnodeLock);
7637 			// Note: That's the easy way. Which is probably OK for sync(),
7638 			// since it's a relatively rare call and doesn't need to allow for
7639 			// a lot of concurrency. Using a read lock would be possible, but
7640 			// also more involved, since we had to lock the individual nodes
7641 			// and take care of the locking order, which we might not want to
7642 			// do while holding fs_mount::rlock.
7643 
7644 		// synchronize access to vnode list
7645 		recursive_lock_lock(&mount->rlock);
7646 
7647 		struct vnode* vnode;
7648 		if (!marker.IsRemoved()) {
7649 			vnode = mount->vnodes.GetNext(&marker);
7650 			mount->vnodes.Remove(&marker);
7651 			marker.SetRemoved(true);
7652 		} else
7653 			vnode = mount->vnodes.First();
7654 
7655 		while (vnode != NULL && (vnode->cache == NULL
7656 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7657 			// TODO: we could track writes (and writable mapped vnodes)
7658 			//	and have a simple flag that we could test for here
7659 			vnode = mount->vnodes.GetNext(vnode);
7660 		}
7661 
7662 		if (vnode != NULL) {
7663 			// insert marker vnode again
7664 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7665 			marker.SetRemoved(false);
7666 		}
7667 
7668 		recursive_lock_unlock(&mount->rlock);
7669 
7670 		if (vnode == NULL)
7671 			break;
7672 
7673 		vnode = lookup_vnode(mount->id, vnode->id);
7674 		if (vnode == NULL || vnode->IsBusy())
7675 			continue;
7676 
7677 		if (vnode->ref_count == 0) {
7678 			// this vnode has been unused before
7679 			vnode_used(vnode);
7680 		}
7681 		inc_vnode_ref_count(vnode);
7682 
7683 		locker.Unlock();
7684 
7685 		if (vnode->cache != NULL && !vnode->IsRemoved())
7686 			vnode->cache->WriteModified();
7687 
7688 		put_vnode(vnode);
7689 	}
7690 
7691 	// And then, let the file systems do their synchronizing work
7692 
7693 	if (HAS_FS_MOUNT_CALL(mount, sync))
7694 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7695 
7696 	put_mount(mount);
7697 	return status;
7698 }
7699 
7700 
7701 static status_t
7702 fs_read_info(dev_t device, struct fs_info* info)
7703 {
7704 	struct fs_mount* mount;
7705 	status_t status = get_mount(device, &mount);
7706 	if (status != B_OK)
7707 		return status;
7708 
7709 	memset(info, 0, sizeof(struct fs_info));
7710 
7711 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7712 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7713 
7714 	// fill in info the file system doesn't (have to) know about
7715 	if (status == B_OK) {
7716 		info->dev = mount->id;
7717 		info->root = mount->root_vnode->id;
7718 
7719 		fs_volume* volume = mount->volume;
7720 		while (volume->super_volume != NULL)
7721 			volume = volume->super_volume;
7722 
7723 		strlcpy(info->fsh_name, volume->file_system_name,
7724 			sizeof(info->fsh_name));
7725 		if (mount->device_name != NULL) {
7726 			strlcpy(info->device_name, mount->device_name,
7727 				sizeof(info->device_name));
7728 		}
7729 	}
7730 
7731 	// if the call is not supported by the file system, there are still
7732 	// the parts that we filled out ourselves
7733 
7734 	put_mount(mount);
7735 	return status;
7736 }
7737 
7738 
7739 static status_t
7740 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7741 {
7742 	struct fs_mount* mount;
7743 	status_t status = get_mount(device, &mount);
7744 	if (status != B_OK)
7745 		return status;
7746 
7747 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7748 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7749 	else
7750 		status = B_READ_ONLY_DEVICE;
7751 
7752 	put_mount(mount);
7753 	return status;
7754 }
7755 
7756 
7757 static dev_t
7758 fs_next_device(int32* _cookie)
7759 {
7760 	struct fs_mount* mount = NULL;
7761 	dev_t device = *_cookie;
7762 
7763 	mutex_lock(&sMountMutex);
7764 
7765 	// Since device IDs are assigned sequentially, this algorithm
7766 	// does work good enough. It makes sure that the device list
7767 	// returned is sorted, and that no device is skipped when an
7768 	// already visited device got unmounted.
7769 
7770 	while (device < sNextMountID) {
7771 		mount = find_mount(device++);
7772 		if (mount != NULL && mount->volume->private_volume != NULL)
7773 			break;
7774 	}
7775 
7776 	*_cookie = device;
7777 
7778 	if (mount != NULL)
7779 		device = mount->id;
7780 	else
7781 		device = B_BAD_VALUE;
7782 
7783 	mutex_unlock(&sMountMutex);
7784 
7785 	return device;
7786 }
7787 
7788 
7789 ssize_t
7790 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7791 	void *buffer, size_t readBytes)
7792 {
7793 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7794 	if (attrFD < 0)
7795 		return attrFD;
7796 
7797 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7798 
7799 	_kern_close(attrFD);
7800 
7801 	return bytesRead;
7802 }
7803 
7804 
7805 static status_t
7806 get_cwd(char* buffer, size_t size, bool kernel)
7807 {
7808 	// Get current working directory from io context
7809 	struct io_context* context = get_current_io_context(kernel);
7810 	status_t status;
7811 
7812 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7813 
7814 	mutex_lock(&context->io_mutex);
7815 
7816 	struct vnode* vnode = context->cwd;
7817 	if (vnode)
7818 		inc_vnode_ref_count(vnode);
7819 
7820 	mutex_unlock(&context->io_mutex);
7821 
7822 	if (vnode) {
7823 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7824 		put_vnode(vnode);
7825 	} else
7826 		status = B_ERROR;
7827 
7828 	return status;
7829 }
7830 
7831 
7832 static status_t
7833 set_cwd(int fd, char* path, bool kernel)
7834 {
7835 	struct io_context* context;
7836 	struct vnode* vnode = NULL;
7837 	struct vnode* oldDirectory;
7838 	status_t status;
7839 
7840 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7841 
7842 	// Get vnode for passed path, and bail if it failed
7843 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7844 	if (status < 0)
7845 		return status;
7846 
7847 	if (!S_ISDIR(vnode->Type())) {
7848 		// nope, can't cwd to here
7849 		status = B_NOT_A_DIRECTORY;
7850 		goto err;
7851 	}
7852 
7853 	// Get current io context and lock
7854 	context = get_current_io_context(kernel);
7855 	mutex_lock(&context->io_mutex);
7856 
7857 	// save the old current working directory first
7858 	oldDirectory = context->cwd;
7859 	context->cwd = vnode;
7860 
7861 	mutex_unlock(&context->io_mutex);
7862 
7863 	if (oldDirectory)
7864 		put_vnode(oldDirectory);
7865 
7866 	return B_NO_ERROR;
7867 
7868 err:
7869 	put_vnode(vnode);
7870 	return status;
7871 }
7872 
7873 
7874 //	#pragma mark - kernel mirrored syscalls
7875 
7876 
7877 dev_t
7878 _kern_mount(const char* path, const char* device, const char* fsName,
7879 	uint32 flags, const char* args, size_t argsLength)
7880 {
7881 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7882 	if (pathBuffer.InitCheck() != B_OK)
7883 		return B_NO_MEMORY;
7884 
7885 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7886 }
7887 
7888 
7889 status_t
7890 _kern_unmount(const char* path, uint32 flags)
7891 {
7892 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7893 	if (pathBuffer.InitCheck() != B_OK)
7894 		return B_NO_MEMORY;
7895 
7896 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7897 }
7898 
7899 
7900 status_t
7901 _kern_read_fs_info(dev_t device, struct fs_info* info)
7902 {
7903 	if (info == NULL)
7904 		return B_BAD_VALUE;
7905 
7906 	return fs_read_info(device, info);
7907 }
7908 
7909 
7910 status_t
7911 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7912 {
7913 	if (info == NULL)
7914 		return B_BAD_VALUE;
7915 
7916 	return fs_write_info(device, info, mask);
7917 }
7918 
7919 
7920 status_t
7921 _kern_sync(void)
7922 {
7923 	// Note: _kern_sync() is also called from _user_sync()
7924 	int32 cookie = 0;
7925 	dev_t device;
7926 	while ((device = next_dev(&cookie)) >= 0) {
7927 		status_t status = fs_sync(device);
7928 		if (status != B_OK && status != B_BAD_VALUE) {
7929 			dprintf("sync: device %ld couldn't sync: %s\n", device,
7930 				strerror(status));
7931 		}
7932 	}
7933 
7934 	return B_OK;
7935 }
7936 
7937 
7938 dev_t
7939 _kern_next_device(int32* _cookie)
7940 {
7941 	return fs_next_device(_cookie);
7942 }
7943 
7944 
7945 status_t
7946 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7947 	size_t infoSize)
7948 {
7949 	if (infoSize != sizeof(fd_info))
7950 		return B_BAD_VALUE;
7951 
7952 	// get the team
7953 	Team* team = Team::Get(teamID);
7954 	if (team == NULL)
7955 		return B_BAD_TEAM_ID;
7956 	BReference<Team> teamReference(team, true);
7957 
7958 	// now that we have a team reference, its I/O context won't go away
7959 	io_context* context = team->io_context;
7960 	MutexLocker contextLocker(context->io_mutex);
7961 
7962 	uint32 slot = *_cookie;
7963 
7964 	struct file_descriptor* descriptor;
7965 	while (slot < context->table_size
7966 		&& (descriptor = context->fds[slot]) == NULL) {
7967 		slot++;
7968 	}
7969 
7970 	if (slot >= context->table_size)
7971 		return B_ENTRY_NOT_FOUND;
7972 
7973 	info->number = slot;
7974 	info->open_mode = descriptor->open_mode;
7975 
7976 	struct vnode* vnode = fd_vnode(descriptor);
7977 	if (vnode != NULL) {
7978 		info->device = vnode->device;
7979 		info->node = vnode->id;
7980 	} else if (descriptor->u.mount != NULL) {
7981 		info->device = descriptor->u.mount->id;
7982 		info->node = -1;
7983 	}
7984 
7985 	*_cookie = slot + 1;
7986 	return B_OK;
7987 }
7988 
7989 
7990 int
7991 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
7992 	int perms)
7993 {
7994 	if ((openMode & O_CREAT) != 0) {
7995 		return file_create_entry_ref(device, inode, name, openMode, perms,
7996 			true);
7997 	}
7998 
7999 	return file_open_entry_ref(device, inode, name, openMode, true);
8000 }
8001 
8002 
8003 /*!	\brief Opens a node specified by a FD + path pair.
8004 
8005 	At least one of \a fd and \a path must be specified.
8006 	If only \a fd is given, the function opens the node identified by this
8007 	FD. If only a path is given, this path is opened. If both are given and
8008 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8009 	of the directory (!) identified by \a fd.
8010 
8011 	\param fd The FD. May be < 0.
8012 	\param path The absolute or relative path. May be \c NULL.
8013 	\param openMode The open mode.
8014 	\return A FD referring to the newly opened node, or an error code,
8015 			if an error occurs.
8016 */
8017 int
8018 _kern_open(int fd, const char* path, int openMode, int perms)
8019 {
8020 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8021 	if (pathBuffer.InitCheck() != B_OK)
8022 		return B_NO_MEMORY;
8023 
8024 	if (openMode & O_CREAT)
8025 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8026 
8027 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8028 }
8029 
8030 
8031 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8032 
8033 	The supplied name may be \c NULL, in which case directory identified
8034 	by \a device and \a inode will be opened. Otherwise \a device and
8035 	\a inode identify the parent directory of the directory to be opened
8036 	and \a name its entry name.
8037 
8038 	\param device If \a name is specified the ID of the device the parent
8039 		   directory of the directory to be opened resides on, otherwise
8040 		   the device of the directory itself.
8041 	\param inode If \a name is specified the node ID of the parent
8042 		   directory of the directory to be opened, otherwise node ID of the
8043 		   directory itself.
8044 	\param name The entry name of the directory to be opened. If \c NULL,
8045 		   the \a device + \a inode pair identify the node to be opened.
8046 	\return The FD of the newly opened directory or an error code, if
8047 			something went wrong.
8048 */
8049 int
8050 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8051 {
8052 	return dir_open_entry_ref(device, inode, name, true);
8053 }
8054 
8055 
8056 /*!	\brief Opens a directory specified by a FD + path pair.
8057 
8058 	At least one of \a fd and \a path must be specified.
8059 	If only \a fd is given, the function opens the directory identified by this
8060 	FD. If only a path is given, this path is opened. If both are given and
8061 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8062 	of the directory (!) identified by \a fd.
8063 
8064 	\param fd The FD. May be < 0.
8065 	\param path The absolute or relative path. May be \c NULL.
8066 	\return A FD referring to the newly opened directory, or an error code,
8067 			if an error occurs.
8068 */
8069 int
8070 _kern_open_dir(int fd, const char* path)
8071 {
8072 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8073 	if (pathBuffer.InitCheck() != B_OK)
8074 		return B_NO_MEMORY;
8075 
8076 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8077 }
8078 
8079 
8080 status_t
8081 _kern_fcntl(int fd, int op, uint32 argument)
8082 {
8083 	return common_fcntl(fd, op, argument, true);
8084 }
8085 
8086 
8087 status_t
8088 _kern_fsync(int fd)
8089 {
8090 	return common_sync(fd, true);
8091 }
8092 
8093 
8094 status_t
8095 _kern_lock_node(int fd)
8096 {
8097 	return common_lock_node(fd, true);
8098 }
8099 
8100 
8101 status_t
8102 _kern_unlock_node(int fd)
8103 {
8104 	return common_unlock_node(fd, true);
8105 }
8106 
8107 
8108 status_t
8109 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8110 	int perms)
8111 {
8112 	return dir_create_entry_ref(device, inode, name, perms, true);
8113 }
8114 
8115 
8116 /*!	\brief Creates a directory specified by a FD + path pair.
8117 
8118 	\a path must always be specified (it contains the name of the new directory
8119 	at least). If only a path is given, this path identifies the location at
8120 	which the directory shall be created. If both \a fd and \a path are given
8121 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8122 	of the directory (!) identified by \a fd.
8123 
8124 	\param fd The FD. May be < 0.
8125 	\param path The absolute or relative path. Must not be \c NULL.
8126 	\param perms The access permissions the new directory shall have.
8127 	\return \c B_OK, if the directory has been created successfully, another
8128 			error code otherwise.
8129 */
8130 status_t
8131 _kern_create_dir(int fd, const char* path, int perms)
8132 {
8133 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8134 	if (pathBuffer.InitCheck() != B_OK)
8135 		return B_NO_MEMORY;
8136 
8137 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8138 }
8139 
8140 
8141 status_t
8142 _kern_remove_dir(int fd, const char* path)
8143 {
8144 	if (path) {
8145 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8146 		if (pathBuffer.InitCheck() != B_OK)
8147 			return B_NO_MEMORY;
8148 
8149 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
8150 	}
8151 
8152 	return dir_remove(fd, NULL, true);
8153 }
8154 
8155 
8156 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8157 
8158 	At least one of \a fd and \a path must be specified.
8159 	If only \a fd is given, the function the symlink to be read is the node
8160 	identified by this FD. If only a path is given, this path identifies the
8161 	symlink to be read. If both are given and the path is absolute, \a fd is
8162 	ignored; a relative path is reckoned off of the directory (!) identified
8163 	by \a fd.
8164 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8165 	will still be updated to reflect the required buffer size.
8166 
8167 	\param fd The FD. May be < 0.
8168 	\param path The absolute or relative path. May be \c NULL.
8169 	\param buffer The buffer into which the contents of the symlink shall be
8170 		   written.
8171 	\param _bufferSize A pointer to the size of the supplied buffer.
8172 	\return The length of the link on success or an appropriate error code
8173 */
8174 status_t
8175 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8176 {
8177 	if (path) {
8178 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8179 		if (pathBuffer.InitCheck() != B_OK)
8180 			return B_NO_MEMORY;
8181 
8182 		return common_read_link(fd, pathBuffer.LockBuffer(),
8183 			buffer, _bufferSize, true);
8184 	}
8185 
8186 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8187 }
8188 
8189 
8190 /*!	\brief Creates a symlink specified by a FD + path pair.
8191 
8192 	\a path must always be specified (it contains the name of the new symlink
8193 	at least). If only a path is given, this path identifies the location at
8194 	which the symlink shall be created. If both \a fd and \a path are given and
8195 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8196 	of the directory (!) identified by \a fd.
8197 
8198 	\param fd The FD. May be < 0.
8199 	\param toPath The absolute or relative path. Must not be \c NULL.
8200 	\param mode The access permissions the new symlink shall have.
8201 	\return \c B_OK, if the symlink has been created successfully, another
8202 			error code otherwise.
8203 */
8204 status_t
8205 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8206 {
8207 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8208 	if (pathBuffer.InitCheck() != B_OK)
8209 		return B_NO_MEMORY;
8210 
8211 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8212 		toPath, mode, true);
8213 }
8214 
8215 
8216 status_t
8217 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8218 	bool traverseLeafLink)
8219 {
8220 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8221 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8222 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8223 		return B_NO_MEMORY;
8224 
8225 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8226 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8227 }
8228 
8229 
8230 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8231 
8232 	\a path must always be specified (it contains at least the name of the entry
8233 	to be deleted). If only a path is given, this path identifies the entry
8234 	directly. If both \a fd and \a path are given and the path is absolute,
8235 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8236 	identified by \a fd.
8237 
8238 	\param fd The FD. May be < 0.
8239 	\param path The absolute or relative path. Must not be \c NULL.
8240 	\return \c B_OK, if the entry has been removed successfully, another
8241 			error code otherwise.
8242 */
8243 status_t
8244 _kern_unlink(int fd, const char* path)
8245 {
8246 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8247 	if (pathBuffer.InitCheck() != B_OK)
8248 		return B_NO_MEMORY;
8249 
8250 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8251 }
8252 
8253 
8254 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8255 		   by another FD + path pair.
8256 
8257 	\a oldPath and \a newPath must always be specified (they contain at least
8258 	the name of the entry). If only a path is given, this path identifies the
8259 	entry directly. If both a FD and a path are given and the path is absolute,
8260 	the FD is ignored; a relative path is reckoned off of the directory (!)
8261 	identified by the respective FD.
8262 
8263 	\param oldFD The FD of the old location. May be < 0.
8264 	\param oldPath The absolute or relative path of the old location. Must not
8265 		   be \c NULL.
8266 	\param newFD The FD of the new location. May be < 0.
8267 	\param newPath The absolute or relative path of the new location. Must not
8268 		   be \c NULL.
8269 	\return \c B_OK, if the entry has been moved successfully, another
8270 			error code otherwise.
8271 */
8272 status_t
8273 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8274 {
8275 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8276 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8277 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8278 		return B_NO_MEMORY;
8279 
8280 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8281 		newFD, newPathBuffer.LockBuffer(), true);
8282 }
8283 
8284 
8285 status_t
8286 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8287 {
8288 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8289 	if (pathBuffer.InitCheck() != B_OK)
8290 		return B_NO_MEMORY;
8291 
8292 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8293 		true);
8294 }
8295 
8296 
8297 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8298 
8299 	If only \a fd is given, the stat operation associated with the type
8300 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8301 	given, this path identifies the entry for whose node to retrieve the
8302 	stat data. If both \a fd and \a path are given and the path is absolute,
8303 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8304 	identified by \a fd and specifies the entry whose stat data shall be
8305 	retrieved.
8306 
8307 	\param fd The FD. May be < 0.
8308 	\param path The absolute or relative path. Must not be \c NULL.
8309 	\param traverseLeafLink If \a path is given, \c true specifies that the
8310 		   function shall not stick to symlinks, but traverse them.
8311 	\param stat The buffer the stat data shall be written into.
8312 	\param statSize The size of the supplied stat buffer.
8313 	\return \c B_OK, if the the stat data have been read successfully, another
8314 			error code otherwise.
8315 */
8316 status_t
8317 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8318 	struct stat* stat, size_t statSize)
8319 {
8320 	struct stat completeStat;
8321 	struct stat* originalStat = NULL;
8322 	status_t status;
8323 
8324 	if (statSize > sizeof(struct stat))
8325 		return B_BAD_VALUE;
8326 
8327 	// this supports different stat extensions
8328 	if (statSize < sizeof(struct stat)) {
8329 		originalStat = stat;
8330 		stat = &completeStat;
8331 	}
8332 
8333 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8334 
8335 	if (status == B_OK && originalStat != NULL)
8336 		memcpy(originalStat, stat, statSize);
8337 
8338 	return status;
8339 }
8340 
8341 
8342 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8343 
8344 	If only \a fd is given, the stat operation associated with the type
8345 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8346 	given, this path identifies the entry for whose node to write the
8347 	stat data. If both \a fd and \a path are given and the path is absolute,
8348 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8349 	identified by \a fd and specifies the entry whose stat data shall be
8350 	written.
8351 
8352 	\param fd The FD. May be < 0.
8353 	\param path The absolute or relative path. Must not be \c NULL.
8354 	\param traverseLeafLink If \a path is given, \c true specifies that the
8355 		   function shall not stick to symlinks, but traverse them.
8356 	\param stat The buffer containing the stat data to be written.
8357 	\param statSize The size of the supplied stat buffer.
8358 	\param statMask A mask specifying which parts of the stat data shall be
8359 		   written.
8360 	\return \c B_OK, if the the stat data have been written successfully,
8361 			another error code otherwise.
8362 */
8363 status_t
8364 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8365 	const struct stat* stat, size_t statSize, int statMask)
8366 {
8367 	struct stat completeStat;
8368 
8369 	if (statSize > sizeof(struct stat))
8370 		return B_BAD_VALUE;
8371 
8372 	// this supports different stat extensions
8373 	if (statSize < sizeof(struct stat)) {
8374 		memset((uint8*)&completeStat + statSize, 0,
8375 			sizeof(struct stat) - statSize);
8376 		memcpy(&completeStat, stat, statSize);
8377 		stat = &completeStat;
8378 	}
8379 
8380 	status_t status;
8381 
8382 	if (path) {
8383 		// path given: write the stat of the node referred to by (fd, path)
8384 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8385 		if (pathBuffer.InitCheck() != B_OK)
8386 			return B_NO_MEMORY;
8387 
8388 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8389 			traverseLeafLink, stat, statMask, true);
8390 	} else {
8391 		// no path given: get the FD and use the FD operation
8392 		struct file_descriptor* descriptor
8393 			= get_fd(get_current_io_context(true), fd);
8394 		if (descriptor == NULL)
8395 			return B_FILE_ERROR;
8396 
8397 		if (descriptor->ops->fd_write_stat)
8398 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8399 		else
8400 			status = B_UNSUPPORTED;
8401 
8402 		put_fd(descriptor);
8403 	}
8404 
8405 	return status;
8406 }
8407 
8408 
8409 int
8410 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8411 {
8412 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8413 	if (pathBuffer.InitCheck() != B_OK)
8414 		return B_NO_MEMORY;
8415 
8416 	if (path != NULL)
8417 		pathBuffer.SetTo(path);
8418 
8419 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL,
8420 		traverseLeafLink, true);
8421 }
8422 
8423 
8424 int
8425 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8426 	int openMode)
8427 {
8428 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8429 	if (pathBuffer.InitCheck() != B_OK)
8430 		return B_NO_MEMORY;
8431 
8432 	if ((openMode & O_CREAT) != 0) {
8433 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8434 			true);
8435 	}
8436 
8437 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8438 }
8439 
8440 
8441 status_t
8442 _kern_remove_attr(int fd, const char* name)
8443 {
8444 	return attr_remove(fd, name, true);
8445 }
8446 
8447 
8448 status_t
8449 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8450 	const char* toName)
8451 {
8452 	return attr_rename(fromFile, fromName, toFile, toName, true);
8453 }
8454 
8455 
8456 int
8457 _kern_open_index_dir(dev_t device)
8458 {
8459 	return index_dir_open(device, true);
8460 }
8461 
8462 
8463 status_t
8464 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8465 {
8466 	return index_create(device, name, type, flags, true);
8467 }
8468 
8469 
8470 status_t
8471 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8472 {
8473 	return index_name_read_stat(device, name, stat, true);
8474 }
8475 
8476 
8477 status_t
8478 _kern_remove_index(dev_t device, const char* name)
8479 {
8480 	return index_remove(device, name, true);
8481 }
8482 
8483 
8484 status_t
8485 _kern_getcwd(char* buffer, size_t size)
8486 {
8487 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8488 
8489 	// Call vfs to get current working directory
8490 	return get_cwd(buffer, size, true);
8491 }
8492 
8493 
8494 status_t
8495 _kern_setcwd(int fd, const char* path)
8496 {
8497 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8498 	if (pathBuffer.InitCheck() != B_OK)
8499 		return B_NO_MEMORY;
8500 
8501 	if (path != NULL)
8502 		pathBuffer.SetTo(path);
8503 
8504 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8505 }
8506 
8507 
8508 //	#pragma mark - userland syscalls
8509 
8510 
8511 dev_t
8512 _user_mount(const char* userPath, const char* userDevice,
8513 	const char* userFileSystem, uint32 flags, const char* userArgs,
8514 	size_t argsLength)
8515 {
8516 	char fileSystem[B_FILE_NAME_LENGTH];
8517 	KPath path, device;
8518 	char* args = NULL;
8519 	status_t status;
8520 
8521 	if (!IS_USER_ADDRESS(userPath)
8522 		|| !IS_USER_ADDRESS(userFileSystem)
8523 		|| !IS_USER_ADDRESS(userDevice))
8524 		return B_BAD_ADDRESS;
8525 
8526 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8527 		return B_NO_MEMORY;
8528 
8529 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8530 		return B_BAD_ADDRESS;
8531 
8532 	if (userFileSystem != NULL
8533 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8534 		return B_BAD_ADDRESS;
8535 
8536 	if (userDevice != NULL
8537 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8538 			< B_OK)
8539 		return B_BAD_ADDRESS;
8540 
8541 	if (userArgs != NULL && argsLength > 0) {
8542 		// this is a safety restriction
8543 		if (argsLength >= 65536)
8544 			return B_NAME_TOO_LONG;
8545 
8546 		args = (char*)malloc(argsLength + 1);
8547 		if (args == NULL)
8548 			return B_NO_MEMORY;
8549 
8550 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8551 			free(args);
8552 			return B_BAD_ADDRESS;
8553 		}
8554 	}
8555 	path.UnlockBuffer();
8556 	device.UnlockBuffer();
8557 
8558 	status = fs_mount(path.LockBuffer(),
8559 		userDevice != NULL ? device.Path() : NULL,
8560 		userFileSystem ? fileSystem : NULL, flags, args, false);
8561 
8562 	free(args);
8563 	return status;
8564 }
8565 
8566 
8567 status_t
8568 _user_unmount(const char* userPath, uint32 flags)
8569 {
8570 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8571 	if (pathBuffer.InitCheck() != B_OK)
8572 		return B_NO_MEMORY;
8573 
8574 	char* path = pathBuffer.LockBuffer();
8575 
8576 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8577 		return B_BAD_ADDRESS;
8578 
8579 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8580 }
8581 
8582 
8583 status_t
8584 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8585 {
8586 	struct fs_info info;
8587 	status_t status;
8588 
8589 	if (userInfo == NULL)
8590 		return B_BAD_VALUE;
8591 
8592 	if (!IS_USER_ADDRESS(userInfo))
8593 		return B_BAD_ADDRESS;
8594 
8595 	status = fs_read_info(device, &info);
8596 	if (status != B_OK)
8597 		return status;
8598 
8599 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8600 		return B_BAD_ADDRESS;
8601 
8602 	return B_OK;
8603 }
8604 
8605 
8606 status_t
8607 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8608 {
8609 	struct fs_info info;
8610 
8611 	if (userInfo == NULL)
8612 		return B_BAD_VALUE;
8613 
8614 	if (!IS_USER_ADDRESS(userInfo)
8615 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8616 		return B_BAD_ADDRESS;
8617 
8618 	return fs_write_info(device, &info, mask);
8619 }
8620 
8621 
8622 dev_t
8623 _user_next_device(int32* _userCookie)
8624 {
8625 	int32 cookie;
8626 	dev_t device;
8627 
8628 	if (!IS_USER_ADDRESS(_userCookie)
8629 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8630 		return B_BAD_ADDRESS;
8631 
8632 	device = fs_next_device(&cookie);
8633 
8634 	if (device >= B_OK) {
8635 		// update user cookie
8636 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8637 			return B_BAD_ADDRESS;
8638 	}
8639 
8640 	return device;
8641 }
8642 
8643 
8644 status_t
8645 _user_sync(void)
8646 {
8647 	return _kern_sync();
8648 }
8649 
8650 
8651 status_t
8652 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8653 	size_t infoSize)
8654 {
8655 	struct fd_info info;
8656 	uint32 cookie;
8657 
8658 	// only root can do this (or should root's group be enough?)
8659 	if (geteuid() != 0)
8660 		return B_NOT_ALLOWED;
8661 
8662 	if (infoSize != sizeof(fd_info))
8663 		return B_BAD_VALUE;
8664 
8665 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8666 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8667 		return B_BAD_ADDRESS;
8668 
8669 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8670 	if (status != B_OK)
8671 		return status;
8672 
8673 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8674 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8675 		return B_BAD_ADDRESS;
8676 
8677 	return status;
8678 }
8679 
8680 
8681 status_t
8682 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8683 	char* userPath, size_t pathLength)
8684 {
8685 	if (!IS_USER_ADDRESS(userPath))
8686 		return B_BAD_ADDRESS;
8687 
8688 	KPath path(B_PATH_NAME_LENGTH + 1);
8689 	if (path.InitCheck() != B_OK)
8690 		return B_NO_MEMORY;
8691 
8692 	// copy the leaf name onto the stack
8693 	char stackLeaf[B_FILE_NAME_LENGTH];
8694 	if (leaf) {
8695 		if (!IS_USER_ADDRESS(leaf))
8696 			return B_BAD_ADDRESS;
8697 
8698 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8699 		if (length < 0)
8700 			return length;
8701 		if (length >= B_FILE_NAME_LENGTH)
8702 			return B_NAME_TOO_LONG;
8703 
8704 		leaf = stackLeaf;
8705 	}
8706 
8707 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8708 		path.LockBuffer(), path.BufferSize());
8709 	if (status != B_OK)
8710 		return status;
8711 
8712 	path.UnlockBuffer();
8713 
8714 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8715 	if (length < 0)
8716 		return length;
8717 	if (length >= (int)pathLength)
8718 		return B_BUFFER_OVERFLOW;
8719 
8720 	return B_OK;
8721 }
8722 
8723 
8724 status_t
8725 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8726 {
8727 	if (userPath == NULL || buffer == NULL)
8728 		return B_BAD_VALUE;
8729 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8730 		return B_BAD_ADDRESS;
8731 
8732 	// copy path from userland
8733 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8734 	if (pathBuffer.InitCheck() != B_OK)
8735 		return B_NO_MEMORY;
8736 	char* path = pathBuffer.LockBuffer();
8737 
8738 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8739 		return B_BAD_ADDRESS;
8740 
8741 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8742 		false);
8743 	if (error != B_OK)
8744 		return error;
8745 
8746 	// copy back to userland
8747 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8748 	if (len < 0)
8749 		return len;
8750 	if (len >= B_PATH_NAME_LENGTH)
8751 		return B_BUFFER_OVERFLOW;
8752 
8753 	return B_OK;
8754 }
8755 
8756 
8757 int
8758 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8759 	int openMode, int perms)
8760 {
8761 	char name[B_FILE_NAME_LENGTH];
8762 
8763 	if (userName == NULL || device < 0 || inode < 0)
8764 		return B_BAD_VALUE;
8765 	if (!IS_USER_ADDRESS(userName)
8766 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8767 		return B_BAD_ADDRESS;
8768 
8769 	if ((openMode & O_CREAT) != 0) {
8770 		return file_create_entry_ref(device, inode, name, openMode, perms,
8771 		 false);
8772 	}
8773 
8774 	return file_open_entry_ref(device, inode, name, openMode, false);
8775 }
8776 
8777 
8778 int
8779 _user_open(int fd, const char* userPath, int openMode, int perms)
8780 {
8781 	KPath path(B_PATH_NAME_LENGTH + 1);
8782 	if (path.InitCheck() != B_OK)
8783 		return B_NO_MEMORY;
8784 
8785 	char* buffer = path.LockBuffer();
8786 
8787 	if (!IS_USER_ADDRESS(userPath)
8788 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8789 		return B_BAD_ADDRESS;
8790 
8791 	if ((openMode & O_CREAT) != 0)
8792 		return file_create(fd, buffer, openMode, perms, false);
8793 
8794 	return file_open(fd, buffer, openMode, false);
8795 }
8796 
8797 
8798 int
8799 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8800 {
8801 	if (userName != NULL) {
8802 		char name[B_FILE_NAME_LENGTH];
8803 
8804 		if (!IS_USER_ADDRESS(userName)
8805 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8806 			return B_BAD_ADDRESS;
8807 
8808 		return dir_open_entry_ref(device, inode, name, false);
8809 	}
8810 	return dir_open_entry_ref(device, inode, NULL, false);
8811 }
8812 
8813 
8814 int
8815 _user_open_dir(int fd, const char* userPath)
8816 {
8817 	if (userPath == NULL)
8818 		return dir_open(fd, NULL, false);
8819 
8820 	KPath path(B_PATH_NAME_LENGTH + 1);
8821 	if (path.InitCheck() != B_OK)
8822 		return B_NO_MEMORY;
8823 
8824 	char* buffer = path.LockBuffer();
8825 
8826 	if (!IS_USER_ADDRESS(userPath)
8827 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8828 		return B_BAD_ADDRESS;
8829 
8830 	return dir_open(fd, buffer, false);
8831 }
8832 
8833 
8834 /*!	\brief Opens a directory's parent directory and returns the entry name
8835 		   of the former.
8836 
8837 	Aside from that it returns the directory's entry name, this method is
8838 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8839 	equivalent, if \a userName is \c NULL.
8840 
8841 	If a name buffer is supplied and the name does not fit the buffer, the
8842 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8843 
8844 	\param fd A FD referring to a directory.
8845 	\param userName Buffer the directory's entry name shall be written into.
8846 		   May be \c NULL.
8847 	\param nameLength Size of the name buffer.
8848 	\return The file descriptor of the opened parent directory, if everything
8849 			went fine, an error code otherwise.
8850 */
8851 int
8852 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8853 {
8854 	bool kernel = false;
8855 
8856 	if (userName && !IS_USER_ADDRESS(userName))
8857 		return B_BAD_ADDRESS;
8858 
8859 	// open the parent dir
8860 	int parentFD = dir_open(fd, (char*)"..", kernel);
8861 	if (parentFD < 0)
8862 		return parentFD;
8863 	FDCloser fdCloser(parentFD, kernel);
8864 
8865 	if (userName) {
8866 		// get the vnodes
8867 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8868 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8869 		VNodePutter parentVNodePutter(parentVNode);
8870 		VNodePutter dirVNodePutter(dirVNode);
8871 		if (!parentVNode || !dirVNode)
8872 			return B_FILE_ERROR;
8873 
8874 		// get the vnode name
8875 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8876 		struct dirent* buffer = (struct dirent*)_buffer;
8877 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8878 			sizeof(_buffer), get_current_io_context(false));
8879 		if (status != B_OK)
8880 			return status;
8881 
8882 		// copy the name to the userland buffer
8883 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8884 		if (len < 0)
8885 			return len;
8886 		if (len >= (int)nameLength)
8887 			return B_BUFFER_OVERFLOW;
8888 	}
8889 
8890 	return fdCloser.Detach();
8891 }
8892 
8893 
8894 status_t
8895 _user_fcntl(int fd, int op, uint32 argument)
8896 {
8897 	status_t status = common_fcntl(fd, op, argument, false);
8898 	if (op == F_SETLKW)
8899 		syscall_restart_handle_post(status);
8900 
8901 	return status;
8902 }
8903 
8904 
8905 status_t
8906 _user_fsync(int fd)
8907 {
8908 	return common_sync(fd, false);
8909 }
8910 
8911 
8912 status_t
8913 _user_flock(int fd, int operation)
8914 {
8915 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8916 
8917 	// Check if the operation is valid
8918 	switch (operation & ~LOCK_NB) {
8919 		case LOCK_UN:
8920 		case LOCK_SH:
8921 		case LOCK_EX:
8922 			break;
8923 
8924 		default:
8925 			return B_BAD_VALUE;
8926 	}
8927 
8928 	struct file_descriptor* descriptor;
8929 	struct vnode* vnode;
8930 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8931 	if (descriptor == NULL)
8932 		return B_FILE_ERROR;
8933 
8934 	if (descriptor->type != FDTYPE_FILE) {
8935 		put_fd(descriptor);
8936 		return B_BAD_VALUE;
8937 	}
8938 
8939 	struct flock flock;
8940 	flock.l_start = 0;
8941 	flock.l_len = OFF_MAX;
8942 	flock.l_whence = 0;
8943 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8944 
8945 	status_t status;
8946 	if ((operation & LOCK_UN) != 0)
8947 		status = release_advisory_lock(vnode, &flock);
8948 	else {
8949 		status = acquire_advisory_lock(vnode,
8950 			thread_get_current_thread()->team->session_id, &flock,
8951 			(operation & LOCK_NB) == 0);
8952 	}
8953 
8954 	syscall_restart_handle_post(status);
8955 
8956 	put_fd(descriptor);
8957 	return status;
8958 }
8959 
8960 
8961 status_t
8962 _user_lock_node(int fd)
8963 {
8964 	return common_lock_node(fd, false);
8965 }
8966 
8967 
8968 status_t
8969 _user_unlock_node(int fd)
8970 {
8971 	return common_unlock_node(fd, false);
8972 }
8973 
8974 
8975 status_t
8976 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
8977 	int perms)
8978 {
8979 	char name[B_FILE_NAME_LENGTH];
8980 	status_t status;
8981 
8982 	if (!IS_USER_ADDRESS(userName))
8983 		return B_BAD_ADDRESS;
8984 
8985 	status = user_strlcpy(name, userName, sizeof(name));
8986 	if (status < 0)
8987 		return status;
8988 
8989 	return dir_create_entry_ref(device, inode, name, perms, false);
8990 }
8991 
8992 
8993 status_t
8994 _user_create_dir(int fd, const char* userPath, int perms)
8995 {
8996 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8997 	if (pathBuffer.InitCheck() != B_OK)
8998 		return B_NO_MEMORY;
8999 
9000 	char* path = pathBuffer.LockBuffer();
9001 
9002 	if (!IS_USER_ADDRESS(userPath)
9003 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9004 		return B_BAD_ADDRESS;
9005 
9006 	return dir_create(fd, path, perms, false);
9007 }
9008 
9009 
9010 status_t
9011 _user_remove_dir(int fd, const char* userPath)
9012 {
9013 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9014 	if (pathBuffer.InitCheck() != B_OK)
9015 		return B_NO_MEMORY;
9016 
9017 	char* path = pathBuffer.LockBuffer();
9018 
9019 	if (userPath != NULL) {
9020 		if (!IS_USER_ADDRESS(userPath)
9021 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9022 			return B_BAD_ADDRESS;
9023 	}
9024 
9025 	return dir_remove(fd, userPath ? path : NULL, false);
9026 }
9027 
9028 
9029 status_t
9030 _user_read_link(int fd, const char* userPath, char* userBuffer,
9031 	size_t* userBufferSize)
9032 {
9033 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9034 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9035 		return B_NO_MEMORY;
9036 
9037 	size_t bufferSize;
9038 
9039 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9040 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9041 		return B_BAD_ADDRESS;
9042 
9043 	char* path = pathBuffer.LockBuffer();
9044 	char* buffer = linkBuffer.LockBuffer();
9045 
9046 	if (userPath) {
9047 		if (!IS_USER_ADDRESS(userPath)
9048 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9049 			return B_BAD_ADDRESS;
9050 
9051 		if (bufferSize > B_PATH_NAME_LENGTH)
9052 			bufferSize = B_PATH_NAME_LENGTH;
9053 	}
9054 
9055 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9056 		&bufferSize, false);
9057 
9058 	// we also update the bufferSize in case of errors
9059 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9060 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9061 		return B_BAD_ADDRESS;
9062 
9063 	if (status != B_OK)
9064 		return status;
9065 
9066 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9067 		return B_BAD_ADDRESS;
9068 
9069 	return B_OK;
9070 }
9071 
9072 
9073 status_t
9074 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9075 	int mode)
9076 {
9077 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9078 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9079 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9080 		return B_NO_MEMORY;
9081 
9082 	char* path = pathBuffer.LockBuffer();
9083 	char* toPath = toPathBuffer.LockBuffer();
9084 
9085 	if (!IS_USER_ADDRESS(userPath)
9086 		|| !IS_USER_ADDRESS(userToPath)
9087 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9088 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9089 		return B_BAD_ADDRESS;
9090 
9091 	return common_create_symlink(fd, path, toPath, mode, false);
9092 }
9093 
9094 
9095 status_t
9096 _user_create_link(int pathFD, const char* userPath, int toFD,
9097 	const char* userToPath, bool traverseLeafLink)
9098 {
9099 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9100 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9101 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9102 		return B_NO_MEMORY;
9103 
9104 	char* path = pathBuffer.LockBuffer();
9105 	char* toPath = toPathBuffer.LockBuffer();
9106 
9107 	if (!IS_USER_ADDRESS(userPath)
9108 		|| !IS_USER_ADDRESS(userToPath)
9109 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9110 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9111 		return B_BAD_ADDRESS;
9112 
9113 	status_t status = check_path(toPath);
9114 	if (status != B_OK)
9115 		return status;
9116 
9117 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9118 		false);
9119 }
9120 
9121 
9122 status_t
9123 _user_unlink(int fd, const char* userPath)
9124 {
9125 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9126 	if (pathBuffer.InitCheck() != B_OK)
9127 		return B_NO_MEMORY;
9128 
9129 	char* path = pathBuffer.LockBuffer();
9130 
9131 	if (!IS_USER_ADDRESS(userPath)
9132 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9133 		return B_BAD_ADDRESS;
9134 
9135 	return common_unlink(fd, path, false);
9136 }
9137 
9138 
9139 status_t
9140 _user_rename(int oldFD, const char* userOldPath, int newFD,
9141 	const char* userNewPath)
9142 {
9143 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9144 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9145 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9146 		return B_NO_MEMORY;
9147 
9148 	char* oldPath = oldPathBuffer.LockBuffer();
9149 	char* newPath = newPathBuffer.LockBuffer();
9150 
9151 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9152 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9153 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9154 		return B_BAD_ADDRESS;
9155 
9156 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9157 }
9158 
9159 
9160 status_t
9161 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9162 {
9163 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9164 	if (pathBuffer.InitCheck() != B_OK)
9165 		return B_NO_MEMORY;
9166 
9167 	char* path = pathBuffer.LockBuffer();
9168 
9169 	if (!IS_USER_ADDRESS(userPath)
9170 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9171 		return B_BAD_ADDRESS;
9172 	}
9173 
9174 	// split into directory vnode and filename path
9175 	char filename[B_FILE_NAME_LENGTH];
9176 	struct vnode* dir;
9177 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9178 	if (status != B_OK)
9179 		return status;
9180 
9181 	VNodePutter _(dir);
9182 
9183 	// the underlying FS needs to support creating FIFOs
9184 	if (!HAS_FS_CALL(dir, create_special_node))
9185 		return B_UNSUPPORTED;
9186 
9187 	// create the entry	-- the FIFO sub node is set up automatically
9188 	fs_vnode superVnode;
9189 	ino_t nodeID;
9190 	status = FS_CALL(dir, create_special_node, filename, NULL,
9191 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9192 
9193 	// create_special_node() acquired a reference for us that we don't need.
9194 	if (status == B_OK)
9195 		put_vnode(dir->mount->volume, nodeID);
9196 
9197 	return status;
9198 }
9199 
9200 
9201 status_t
9202 _user_create_pipe(int* userFDs)
9203 {
9204 	// rootfs should support creating FIFOs, but let's be sure
9205 	if (!HAS_FS_CALL(sRoot, create_special_node))
9206 		return B_UNSUPPORTED;
9207 
9208 	// create the node	-- the FIFO sub node is set up automatically
9209 	fs_vnode superVnode;
9210 	ino_t nodeID;
9211 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9212 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9213 	if (status != B_OK)
9214 		return status;
9215 
9216 	// We've got one reference to the node and need another one.
9217 	struct vnode* vnode;
9218 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9219 	if (status != B_OK) {
9220 		// that should not happen
9221 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
9222 			sRoot->mount->id, sRoot->id);
9223 		return status;
9224 	}
9225 
9226 	// Everything looks good so far. Open two FDs for reading respectively
9227 	// writing.
9228 	int fds[2];
9229 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9230 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9231 
9232 	FDCloser closer0(fds[0], false);
9233 	FDCloser closer1(fds[1], false);
9234 
9235 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9236 
9237 	// copy FDs to userland
9238 	if (status == B_OK) {
9239 		if (!IS_USER_ADDRESS(userFDs)
9240 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9241 			status = B_BAD_ADDRESS;
9242 		}
9243 	}
9244 
9245 	// keep FDs, if everything went fine
9246 	if (status == B_OK) {
9247 		closer0.Detach();
9248 		closer1.Detach();
9249 	}
9250 
9251 	return status;
9252 }
9253 
9254 
9255 status_t
9256 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9257 {
9258 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9259 	if (pathBuffer.InitCheck() != B_OK)
9260 		return B_NO_MEMORY;
9261 
9262 	char* path = pathBuffer.LockBuffer();
9263 
9264 	if (!IS_USER_ADDRESS(userPath)
9265 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9266 		return B_BAD_ADDRESS;
9267 
9268 	return common_access(fd, path, mode, effectiveUserGroup, false);
9269 }
9270 
9271 
9272 status_t
9273 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9274 	struct stat* userStat, size_t statSize)
9275 {
9276 	struct stat stat;
9277 	status_t status;
9278 
9279 	if (statSize > sizeof(struct stat))
9280 		return B_BAD_VALUE;
9281 
9282 	if (!IS_USER_ADDRESS(userStat))
9283 		return B_BAD_ADDRESS;
9284 
9285 	if (userPath) {
9286 		// path given: get the stat of the node referred to by (fd, path)
9287 		if (!IS_USER_ADDRESS(userPath))
9288 			return B_BAD_ADDRESS;
9289 
9290 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9291 		if (pathBuffer.InitCheck() != B_OK)
9292 			return B_NO_MEMORY;
9293 
9294 		char* path = pathBuffer.LockBuffer();
9295 
9296 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9297 		if (length < B_OK)
9298 			return length;
9299 		if (length >= B_PATH_NAME_LENGTH)
9300 			return B_NAME_TOO_LONG;
9301 
9302 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9303 	} else {
9304 		// no path given: get the FD and use the FD operation
9305 		struct file_descriptor* descriptor
9306 			= get_fd(get_current_io_context(false), fd);
9307 		if (descriptor == NULL)
9308 			return B_FILE_ERROR;
9309 
9310 		if (descriptor->ops->fd_read_stat)
9311 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9312 		else
9313 			status = B_UNSUPPORTED;
9314 
9315 		put_fd(descriptor);
9316 	}
9317 
9318 	if (status != B_OK)
9319 		return status;
9320 
9321 	return user_memcpy(userStat, &stat, statSize);
9322 }
9323 
9324 
9325 status_t
9326 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9327 	const struct stat* userStat, size_t statSize, int statMask)
9328 {
9329 	if (statSize > sizeof(struct stat))
9330 		return B_BAD_VALUE;
9331 
9332 	struct stat stat;
9333 
9334 	if (!IS_USER_ADDRESS(userStat)
9335 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9336 		return B_BAD_ADDRESS;
9337 
9338 	// clear additional stat fields
9339 	if (statSize < sizeof(struct stat))
9340 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9341 
9342 	status_t status;
9343 
9344 	if (userPath) {
9345 		// path given: write the stat of the node referred to by (fd, path)
9346 		if (!IS_USER_ADDRESS(userPath))
9347 			return B_BAD_ADDRESS;
9348 
9349 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9350 		if (pathBuffer.InitCheck() != B_OK)
9351 			return B_NO_MEMORY;
9352 
9353 		char* path = pathBuffer.LockBuffer();
9354 
9355 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9356 		if (length < B_OK)
9357 			return length;
9358 		if (length >= B_PATH_NAME_LENGTH)
9359 			return B_NAME_TOO_LONG;
9360 
9361 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9362 			statMask, false);
9363 	} else {
9364 		// no path given: get the FD and use the FD operation
9365 		struct file_descriptor* descriptor
9366 			= get_fd(get_current_io_context(false), fd);
9367 		if (descriptor == NULL)
9368 			return B_FILE_ERROR;
9369 
9370 		if (descriptor->ops->fd_write_stat) {
9371 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9372 				statMask);
9373 		} else
9374 			status = B_UNSUPPORTED;
9375 
9376 		put_fd(descriptor);
9377 	}
9378 
9379 	return status;
9380 }
9381 
9382 
9383 int
9384 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9385 {
9386 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9387 	if (pathBuffer.InitCheck() != B_OK)
9388 		return B_NO_MEMORY;
9389 
9390 	char* path = pathBuffer.LockBuffer();
9391 
9392 	if (userPath != NULL) {
9393 		if (!IS_USER_ADDRESS(userPath)
9394 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9395 			return B_BAD_ADDRESS;
9396 	}
9397 
9398 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9399 }
9400 
9401 
9402 ssize_t
9403 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9404 	size_t readBytes)
9405 {
9406 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9407 	if (attr < 0)
9408 		return attr;
9409 
9410 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9411 	_user_close(attr);
9412 
9413 	return bytes;
9414 }
9415 
9416 
9417 ssize_t
9418 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9419 	const void* buffer, size_t writeBytes)
9420 {
9421 	// Try to support the BeOS typical truncation as well as the position
9422 	// argument
9423 	int attr = attr_create(fd, NULL, attribute, type,
9424 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9425 	if (attr < 0)
9426 		return attr;
9427 
9428 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9429 	_user_close(attr);
9430 
9431 	return bytes;
9432 }
9433 
9434 
9435 status_t
9436 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9437 {
9438 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9439 	if (attr < 0)
9440 		return attr;
9441 
9442 	struct file_descriptor* descriptor
9443 		= get_fd(get_current_io_context(false), attr);
9444 	if (descriptor == NULL) {
9445 		_user_close(attr);
9446 		return B_FILE_ERROR;
9447 	}
9448 
9449 	struct stat stat;
9450 	status_t status;
9451 	if (descriptor->ops->fd_read_stat)
9452 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9453 	else
9454 		status = B_UNSUPPORTED;
9455 
9456 	put_fd(descriptor);
9457 	_user_close(attr);
9458 
9459 	if (status == B_OK) {
9460 		attr_info info;
9461 		info.type = stat.st_type;
9462 		info.size = stat.st_size;
9463 
9464 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9465 			return B_BAD_ADDRESS;
9466 	}
9467 
9468 	return status;
9469 }
9470 
9471 
9472 int
9473 _user_open_attr(int fd, const char* userPath, const char* userName,
9474 	uint32 type, int openMode)
9475 {
9476 	char name[B_FILE_NAME_LENGTH];
9477 
9478 	if (!IS_USER_ADDRESS(userName)
9479 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9480 		return B_BAD_ADDRESS;
9481 
9482 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9483 	if (pathBuffer.InitCheck() != B_OK)
9484 		return B_NO_MEMORY;
9485 
9486 	char* path = pathBuffer.LockBuffer();
9487 
9488 	if (userPath != NULL) {
9489 		if (!IS_USER_ADDRESS(userPath)
9490 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9491 			return B_BAD_ADDRESS;
9492 	}
9493 
9494 	if ((openMode & O_CREAT) != 0) {
9495 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9496 			false);
9497 	}
9498 
9499 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9500 }
9501 
9502 
9503 status_t
9504 _user_remove_attr(int fd, const char* userName)
9505 {
9506 	char name[B_FILE_NAME_LENGTH];
9507 
9508 	if (!IS_USER_ADDRESS(userName)
9509 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9510 		return B_BAD_ADDRESS;
9511 
9512 	return attr_remove(fd, name, false);
9513 }
9514 
9515 
9516 status_t
9517 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9518 	const char* userToName)
9519 {
9520 	if (!IS_USER_ADDRESS(userFromName)
9521 		|| !IS_USER_ADDRESS(userToName))
9522 		return B_BAD_ADDRESS;
9523 
9524 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9525 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9526 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9527 		return B_NO_MEMORY;
9528 
9529 	char* fromName = fromNameBuffer.LockBuffer();
9530 	char* toName = toNameBuffer.LockBuffer();
9531 
9532 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9533 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9534 		return B_BAD_ADDRESS;
9535 
9536 	return attr_rename(fromFile, fromName, toFile, toName, false);
9537 }
9538 
9539 
9540 int
9541 _user_open_index_dir(dev_t device)
9542 {
9543 	return index_dir_open(device, false);
9544 }
9545 
9546 
9547 status_t
9548 _user_create_index(dev_t device, const char* userName, uint32 type,
9549 	uint32 flags)
9550 {
9551 	char name[B_FILE_NAME_LENGTH];
9552 
9553 	if (!IS_USER_ADDRESS(userName)
9554 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9555 		return B_BAD_ADDRESS;
9556 
9557 	return index_create(device, name, type, flags, false);
9558 }
9559 
9560 
9561 status_t
9562 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9563 {
9564 	char name[B_FILE_NAME_LENGTH];
9565 	struct stat stat;
9566 	status_t status;
9567 
9568 	if (!IS_USER_ADDRESS(userName)
9569 		|| !IS_USER_ADDRESS(userStat)
9570 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9571 		return B_BAD_ADDRESS;
9572 
9573 	status = index_name_read_stat(device, name, &stat, false);
9574 	if (status == B_OK) {
9575 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9576 			return B_BAD_ADDRESS;
9577 	}
9578 
9579 	return status;
9580 }
9581 
9582 
9583 status_t
9584 _user_remove_index(dev_t device, const char* userName)
9585 {
9586 	char name[B_FILE_NAME_LENGTH];
9587 
9588 	if (!IS_USER_ADDRESS(userName)
9589 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9590 		return B_BAD_ADDRESS;
9591 
9592 	return index_remove(device, name, false);
9593 }
9594 
9595 
9596 status_t
9597 _user_getcwd(char* userBuffer, size_t size)
9598 {
9599 	if (size == 0)
9600 		return B_BAD_VALUE;
9601 	if (!IS_USER_ADDRESS(userBuffer))
9602 		return B_BAD_ADDRESS;
9603 
9604 	if (size > kMaxPathLength)
9605 		size = kMaxPathLength;
9606 
9607 	KPath pathBuffer(size);
9608 	if (pathBuffer.InitCheck() != B_OK)
9609 		return B_NO_MEMORY;
9610 
9611 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9612 
9613 	char* path = pathBuffer.LockBuffer();
9614 
9615 	status_t status = get_cwd(path, size, false);
9616 	if (status != B_OK)
9617 		return status;
9618 
9619 	// Copy back the result
9620 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9621 		return B_BAD_ADDRESS;
9622 
9623 	return status;
9624 }
9625 
9626 
9627 status_t
9628 _user_setcwd(int fd, const char* userPath)
9629 {
9630 	TRACE(("user_setcwd: path = %p\n", userPath));
9631 
9632 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9633 	if (pathBuffer.InitCheck() != B_OK)
9634 		return B_NO_MEMORY;
9635 
9636 	char* path = pathBuffer.LockBuffer();
9637 
9638 	if (userPath != NULL) {
9639 		if (!IS_USER_ADDRESS(userPath)
9640 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9641 			return B_BAD_ADDRESS;
9642 	}
9643 
9644 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9645 }
9646 
9647 
9648 status_t
9649 _user_change_root(const char* userPath)
9650 {
9651 	// only root is allowed to chroot()
9652 	if (geteuid() != 0)
9653 		return B_NOT_ALLOWED;
9654 
9655 	// alloc path buffer
9656 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9657 	if (pathBuffer.InitCheck() != B_OK)
9658 		return B_NO_MEMORY;
9659 
9660 	// copy userland path to kernel
9661 	char* path = pathBuffer.LockBuffer();
9662 	if (userPath != NULL) {
9663 		if (!IS_USER_ADDRESS(userPath)
9664 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9665 			return B_BAD_ADDRESS;
9666 	}
9667 
9668 	// get the vnode
9669 	struct vnode* vnode;
9670 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9671 	if (status != B_OK)
9672 		return status;
9673 
9674 	// set the new root
9675 	struct io_context* context = get_current_io_context(false);
9676 	mutex_lock(&sIOContextRootLock);
9677 	struct vnode* oldRoot = context->root;
9678 	context->root = vnode;
9679 	mutex_unlock(&sIOContextRootLock);
9680 
9681 	put_vnode(oldRoot);
9682 
9683 	return B_OK;
9684 }
9685 
9686 
9687 int
9688 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9689 	uint32 flags, port_id port, int32 token)
9690 {
9691 	char* query;
9692 
9693 	if (device < 0 || userQuery == NULL || queryLength == 0)
9694 		return B_BAD_VALUE;
9695 
9696 	// this is a safety restriction
9697 	if (queryLength >= 65536)
9698 		return B_NAME_TOO_LONG;
9699 
9700 	query = (char*)malloc(queryLength + 1);
9701 	if (query == NULL)
9702 		return B_NO_MEMORY;
9703 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9704 		free(query);
9705 		return B_BAD_ADDRESS;
9706 	}
9707 
9708 	int fd = query_open(device, query, flags, port, token, false);
9709 
9710 	free(query);
9711 	return fd;
9712 }
9713 
9714 
9715 #include "vfs_request_io.cpp"
9716