xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 9708b080608f70250ed72121927004a42f549c4a)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <AutoDeleterDrivers.h>
36 #include <block_cache.h>
37 #include <boot/kernel_args.h>
38 #include <debug_heap.h>
39 #include <disk_device_manager/KDiskDevice.h>
40 #include <disk_device_manager/KDiskDeviceManager.h>
41 #include <disk_device_manager/KDiskDeviceUtils.h>
42 #include <disk_device_manager/KDiskSystem.h>
43 #include <fd.h>
44 #include <file_cache.h>
45 #include <fs/node_monitor.h>
46 #include <KPath.h>
47 #include <lock.h>
48 #include <low_resource_manager.h>
49 #include <slab/Slab.h>
50 #include <StackOrHeapArray.h>
51 #include <syscalls.h>
52 #include <syscall_restart.h>
53 #include <tracing.h>
54 #include <util/atomic.h>
55 #include <util/AutoLock.h>
56 #include <util/ThreadAutoLock.h>
57 #include <util/DoublyLinkedList.h>
58 #include <vfs.h>
59 #include <vm/vm.h>
60 #include <vm/VMCache.h>
61 #include <wait_for_objects.h>
62 
63 #include "EntryCache.h"
64 #include "fifo.h"
65 #include "IORequest.h"
66 #include "unused_vnodes.h"
67 #include "vfs_tracing.h"
68 #include "Vnode.h"
69 #include "../cache/vnode_store.h"
70 
71 
72 //#define TRACE_VFS
73 #ifdef TRACE_VFS
74 #	define TRACE(x) dprintf x
75 #	define FUNCTION(x) dprintf x
76 #else
77 #	define TRACE(x) ;
78 #	define FUNCTION(x) ;
79 #endif
80 
81 #define ADD_DEBUGGER_COMMANDS
82 
83 
84 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
85 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
86 
87 #if KDEBUG
88 #	define FS_CALL(vnode, op, params...) \
89 		( HAS_FS_CALL(vnode, op) ? \
90 			vnode->ops->op(vnode->mount->volume, vnode, params) \
91 			: (panic("FS_CALL: vnode %p op " #op " is NULL", vnode), 0))
92 #	define FS_CALL_NO_PARAMS(vnode, op) \
93 		( HAS_FS_CALL(vnode, op) ? \
94 			vnode->ops->op(vnode->mount->volume, vnode) \
95 			: (panic("FS_CALL_NO_PARAMS: vnode %p op " #op " is NULL", vnode), 0))
96 #	define FS_MOUNT_CALL(mount, op, params...) \
97 		( HAS_FS_MOUNT_CALL(mount, op) ? \
98 			mount->volume->ops->op(mount->volume, params) \
99 			: (panic("FS_MOUNT_CALL: mount %p op " #op " is NULL", mount), 0))
100 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
101 		( HAS_FS_MOUNT_CALL(mount, op) ? \
102 			mount->volume->ops->op(mount->volume) \
103 			: (panic("FS_MOUNT_CALL_NO_PARAMS: mount %p op " #op " is NULL", mount), 0))
104 #else
105 #	define FS_CALL(vnode, op, params...) \
106 			vnode->ops->op(vnode->mount->volume, vnode, params)
107 #	define FS_CALL_NO_PARAMS(vnode, op) \
108 			vnode->ops->op(vnode->mount->volume, vnode)
109 #	define FS_MOUNT_CALL(mount, op, params...) \
110 			mount->volume->ops->op(mount->volume, params)
111 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
112 			mount->volume->ops->op(mount->volume)
113 #endif
114 
115 
116 const static size_t kMaxPathLength = 65536;
117 	// The absolute maximum path length (for getcwd() - this is not depending
118 	// on PATH_MAX
119 
120 
121 typedef DoublyLinkedList<vnode> VnodeList;
122 
123 /*!	\brief Structure to manage a mounted file system
124 
125 	Note: The root_vnode and root_vnode->covers fields (what others?) are
126 	initialized in fs_mount() and not changed afterwards. That is as soon
127 	as the mount is mounted and it is made sure it won't be unmounted
128 	(e.g. by holding a reference to a vnode of that mount) (read) access
129 	to those fields is always safe, even without additional locking. Morever
130 	while mounted the mount holds a reference to the root_vnode->covers vnode,
131 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
132 	safe if a reference to vnode is held (note that for the root mount
133 	root_vnode->covers is NULL, though).
134 */
135 struct fs_mount {
fs_mountfs_mount136 	fs_mount()
137 		:
138 		volume(NULL),
139 		device_name(NULL)
140 	{
141 		mutex_init(&lock, "mount lock");
142 	}
143 
~fs_mountfs_mount144 	~fs_mount()
145 	{
146 		mutex_destroy(&lock);
147 		free(device_name);
148 
149 		while (volume) {
150 			fs_volume* superVolume = volume->super_volume;
151 
152 			if (volume->file_system != NULL)
153 				put_module(volume->file_system->info.name);
154 
155 			free(volume->file_system_name);
156 			free(volume);
157 			volume = superVolume;
158 		}
159 	}
160 
161 	struct fs_mount* next;
162 	dev_t			id;
163 	fs_volume*		volume;
164 	char*			device_name;
165 	mutex			lock;	// guards the vnodes list
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 
176 namespace {
177 
178 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
179 	void*			bound_to;
180 	team_id			team;
181 	pid_t			session;
182 	off_t			start;
183 	off_t			end;
184 	bool			shared;
185 };
186 
187 typedef DoublyLinkedList<advisory_lock> LockList;
188 
189 } // namespace
190 
191 
192 struct advisory_locking {
193 	sem_id			lock;
194 	sem_id			wait_sem;
195 	LockList		locks;
196 
advisory_lockingadvisory_locking197 	advisory_locking()
198 		:
199 		lock(-1),
200 		wait_sem(-1)
201 	{
202 	}
203 
~advisory_lockingadvisory_locking204 	~advisory_locking()
205 	{
206 		if (lock >= 0)
207 			delete_sem(lock);
208 		if (wait_sem >= 0)
209 			delete_sem(wait_sem);
210 	}
211 };
212 
213 /*!	\brief Guards sMountsTable.
214 
215 	The holder is allowed to read/write access the sMountsTable.
216 	Manipulation of the fs_mount structures themselves
217 	(and their destruction) requires different locks though.
218 */
219 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
220 
221 /*!	\brief Guards mount/unmount operations.
222 
223 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
224 	That is locking the lock ensures that no FS is mounted/unmounted. In
225 	particular this means that
226 	- sMountsTable will not be modified,
227 	- the fields immutable after initialization of the fs_mount structures in
228 	  sMountsTable will not be modified,
229 
230 	The thread trying to lock the lock must not hold sVnodeLock or
231 	sMountLock.
232 */
233 static recursive_lock sMountOpLock;
234 
235 /*!	\brief Guards sVnodeTable.
236 
237 	The holder is allowed read/write access to sVnodeTable and to
238 	any unbusy vnode in that table, save to the immutable fields (device, id,
239 	private_node, mount) to which only read-only access is allowed.
240 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
241 	well as the busy, removed, unused flags, and the vnode's type can also be
242 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
243 	locked. Write access to covered_by and covers requires to write lock
244 	sVnodeLock.
245 
246 	The thread trying to acquire the lock must not hold sMountLock.
247 	You must not hold this lock when calling create_sem(), as this might call
248 	vfs_free_unused_vnodes() and thus cause a deadlock.
249 */
250 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
251 
252 /*!	\brief Guards io_context::root.
253 
254 	Must be held when setting or getting the io_context::root field.
255 	The only operation allowed while holding this lock besides getting or
256 	setting the field is inc_vnode_ref_count() on io_context::root.
257 */
258 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
259 
260 
261 namespace {
262 
263 struct vnode_hash_key {
264 	dev_t	device;
265 	ino_t	vnode;
266 };
267 
268 struct VnodeHash {
269 	typedef vnode_hash_key	KeyType;
270 	typedef	struct vnode	ValueType;
271 
272 #define VHASH(mountid, vnodeid) \
273 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
274 
HashKey__anon056493030211::VnodeHash275 	size_t HashKey(KeyType key) const
276 	{
277 		return VHASH(key.device, key.vnode);
278 	}
279 
Hash__anon056493030211::VnodeHash280 	size_t Hash(ValueType* vnode) const
281 	{
282 		return VHASH(vnode->device, vnode->id);
283 	}
284 
285 #undef VHASH
286 
Compare__anon056493030211::VnodeHash287 	bool Compare(KeyType key, ValueType* vnode) const
288 	{
289 		return vnode->device == key.device && vnode->id == key.vnode;
290 	}
291 
GetLink__anon056493030211::VnodeHash292 	ValueType*& GetLink(ValueType* value) const
293 	{
294 		return value->hash_next;
295 	}
296 };
297 
298 typedef BOpenHashTable<VnodeHash> VnodeTable;
299 
300 
301 struct MountHash {
302 	typedef dev_t			KeyType;
303 	typedef	struct fs_mount	ValueType;
304 
HashKey__anon056493030211::MountHash305 	size_t HashKey(KeyType key) const
306 	{
307 		return key;
308 	}
309 
Hash__anon056493030211::MountHash310 	size_t Hash(ValueType* mount) const
311 	{
312 		return mount->id;
313 	}
314 
Compare__anon056493030211::MountHash315 	bool Compare(KeyType key, ValueType* mount) const
316 	{
317 		return mount->id == key;
318 	}
319 
GetLink__anon056493030211::MountHash320 	ValueType*& GetLink(ValueType* value) const
321 	{
322 		return value->next;
323 	}
324 };
325 
326 typedef BOpenHashTable<MountHash> MountTable;
327 
328 } // namespace
329 
330 
331 object_cache* sPathNameCache;
332 object_cache* sVnodeCache;
333 object_cache* sFileDescriptorCache;
334 
335 #define VNODE_HASH_TABLE_SIZE 1024
336 static VnodeTable* sVnodeTable;
337 static struct vnode* sRoot;
338 
339 #define MOUNTS_HASH_TABLE_SIZE 16
340 static MountTable* sMountsTable;
341 static dev_t sNextMountID = 1;
342 
343 #define MAX_TEMP_IO_VECS 8
344 
345 // How long to wait for busy vnodes (10s)
346 #define BUSY_VNODE_RETRIES 2000
347 #define BUSY_VNODE_DELAY 5000
348 
349 mode_t __gUmask = 022;
350 
351 /* function declarations */
352 
353 static void free_unused_vnodes();
354 
355 // file descriptor operation prototypes
356 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
357 	void* buffer, size_t* _bytes);
358 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
359 	const void* buffer, size_t* _bytes);
360 static ssize_t file_readv(struct file_descriptor* descriptor, off_t pos,
361 	const struct iovec *vecs, int count);
362 static ssize_t file_writev(struct file_descriptor* descriptor, off_t pos,
363 	const struct iovec *vecs, int count);
364 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
365 	int seekType);
366 static void file_free_fd(struct file_descriptor* descriptor);
367 static status_t file_close(struct file_descriptor* descriptor);
368 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
369 	struct selectsync* sync);
370 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
371 	struct selectsync* sync);
372 static status_t dir_read(struct io_context* context,
373 	struct file_descriptor* descriptor, struct dirent* buffer,
374 	size_t bufferSize, uint32* _count);
375 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
376 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
377 static status_t dir_rewind(struct file_descriptor* descriptor);
378 static void dir_free_fd(struct file_descriptor* descriptor);
379 static status_t dir_close(struct file_descriptor* descriptor);
380 static status_t attr_dir_read(struct io_context* context,
381 	struct file_descriptor* descriptor, struct dirent* buffer,
382 	size_t bufferSize, uint32* _count);
383 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
384 static void attr_dir_free_fd(struct file_descriptor* descriptor);
385 static status_t attr_dir_close(struct file_descriptor* descriptor);
386 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
387 	void* buffer, size_t* _bytes);
388 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
389 	const void* buffer, size_t* _bytes);
390 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
391 	int seekType);
392 static void attr_free_fd(struct file_descriptor* descriptor);
393 static status_t attr_close(struct file_descriptor* descriptor);
394 static status_t attr_read_stat(struct file_descriptor* descriptor,
395 	struct stat* statData);
396 static status_t attr_write_stat(struct file_descriptor* descriptor,
397 	const struct stat* stat, int statMask);
398 static status_t index_dir_read(struct io_context* context,
399 	struct file_descriptor* descriptor, struct dirent* buffer,
400 	size_t bufferSize, uint32* _count);
401 static status_t index_dir_rewind(struct file_descriptor* descriptor);
402 static void index_dir_free_fd(struct file_descriptor* descriptor);
403 static status_t index_dir_close(struct file_descriptor* descriptor);
404 static status_t query_read(struct io_context* context,
405 	struct file_descriptor* descriptor, struct dirent* buffer,
406 	size_t bufferSize, uint32* _count);
407 static status_t query_rewind(struct file_descriptor* descriptor);
408 static void query_free_fd(struct file_descriptor* descriptor);
409 static status_t query_close(struct file_descriptor* descriptor);
410 
411 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
412 	void* buffer, size_t length);
413 static status_t common_read_stat(struct file_descriptor* descriptor,
414 	struct stat* statData);
415 static status_t common_write_stat(struct file_descriptor* descriptor,
416 	const struct stat* statData, int statMask);
417 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
418 	struct stat* stat, bool kernel);
419 
420 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
421 	bool traverseLeafLink, bool kernel,
422 	VnodePutter& _vnode, ino_t* _parentID, char* leafName = NULL);
423 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
424 	size_t bufferSize, bool kernel);
425 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
426 	VnodePutter& _vnode, ino_t* _parentID, bool kernel);
427 static int32 inc_vnode_ref_count(struct vnode* vnode);
428 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
429 	bool reenter);
430 static inline void put_vnode(struct vnode* vnode);
431 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
432 	bool kernel);
433 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
434 
435 
436 static struct fd_ops sFileOps = {
437 	file_close,
438 	file_free_fd,
439 	file_read,
440 	file_write,
441 	file_readv,
442 	file_writev,
443 	file_seek,
444 	common_ioctl,
445 	NULL,		// set_flags()
446 	file_select,
447 	file_deselect,
448 	NULL,		// read_dir()
449 	NULL,		// rewind_dir()
450 	common_read_stat,
451 	common_write_stat,
452 };
453 
454 static struct fd_ops sDirectoryOps = {
455 	dir_close,
456 	dir_free_fd,
457 	NULL, NULL,	// read(), write()
458 	NULL, NULL,	// readv(), writev()
459 	NULL,		// seek()
460 	common_ioctl,
461 	NULL,		// set_flags
462 	NULL,		// select()
463 	NULL,		// deselect()
464 	dir_read,
465 	dir_rewind,
466 	common_read_stat,
467 	common_write_stat,
468 };
469 
470 static struct fd_ops sAttributeDirectoryOps = {
471 	attr_dir_close,
472 	attr_dir_free_fd,
473 	NULL, NULL,	// read(), write()
474 	NULL, NULL,	// readv(), writev()
475 	NULL,		// seek()
476 	common_ioctl,
477 	NULL,		// set_flags
478 	NULL,		// select()
479 	NULL,		// deselect()
480 	attr_dir_read,
481 	attr_dir_rewind,
482 	common_read_stat,
483 	common_write_stat,
484 };
485 
486 static struct fd_ops sAttributeOps = {
487 	attr_close,
488 	attr_free_fd,
489 	attr_read,
490 	attr_write,
491 	NULL,		// readv()
492 	NULL,		// writev()
493 	attr_seek,
494 	common_ioctl,
495 	NULL,		// set_flags()
496 	NULL,		// select()
497 	NULL,		// deselect()
498 	NULL,		// read_dir()
499 	NULL,		// rewind_dir()
500 	attr_read_stat,
501 	attr_write_stat,
502 };
503 
504 static struct fd_ops sIndexDirectoryOps = {
505 	index_dir_close,
506 	index_dir_free_fd,
507 	NULL, NULL,	// read(), write()
508 	NULL, NULL,	// readv(), writev()
509 	NULL,		// seek()
510 	NULL,		// ioctl()
511 	NULL,		// set_flags()
512 	NULL,		// select()
513 	NULL,		// deselect()
514 	index_dir_read,
515 	index_dir_rewind,
516 	NULL,		// read_stat()
517 	NULL,		// write_stat()
518 };
519 
520 #if 0
521 static struct fd_ops sIndexOps = {
522 	NULL,		// dir_close()
523 	NULL,		// free_fd()
524 	NULL, NULL,	// read(), write()
525 	NULL, NULL,	// readv(), writev()
526 	NULL,		// seek()
527 	NULL,		// ioctl()
528 	NULL,		// set_flags
529 	NULL,		// select()
530 	NULL,		// deselect()
531 	NULL,		// dir_read()
532 	NULL,		// dir_rewind()
533 	index_read_stat,	// read_stat()
534 	NULL,		// write_stat()
535 };
536 #endif
537 
538 static struct fd_ops sQueryOps = {
539 	query_close,
540 	query_free_fd,
541 	NULL, NULL,	// read(), write()
542 	NULL, NULL,	// readv(), writev()
543 	NULL,		// seek()
544 	NULL,		// ioctl()
545 	NULL,		// set_flags()
546 	NULL,		// select()
547 	NULL,		// deselect()
548 	query_read,
549 	query_rewind,
550 	NULL,		// read_stat()
551 	NULL,		// write_stat()
552 };
553 
554 
555 namespace {
556 
557 class FDCloser {
558 public:
FDCloser()559 	FDCloser() : fFD(-1), fKernel(true) {}
560 
FDCloser(int fd,bool kernel)561 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
562 
~FDCloser()563 	~FDCloser()
564 	{
565 		Close();
566 	}
567 
SetTo(int fd,bool kernel)568 	void SetTo(int fd, bool kernel)
569 	{
570 		Close();
571 		fFD = fd;
572 		fKernel = kernel;
573 	}
574 
Close()575 	void Close()
576 	{
577 		if (fFD >= 0) {
578 			if (fKernel)
579 				_kern_close(fFD);
580 			else
581 				_user_close(fFD);
582 			fFD = -1;
583 		}
584 	}
585 
Detach()586 	int Detach()
587 	{
588 		int fd = fFD;
589 		fFD = -1;
590 		return fd;
591 	}
592 
593 private:
594 	int		fFD;
595 	bool	fKernel;
596 };
597 
598 } // namespace
599 
600 
601 #if VFS_PAGES_IO_TRACING
602 
603 namespace VFSPagesIOTracing {
604 
605 class PagesIOTraceEntry : public AbstractTraceEntry {
606 protected:
PagesIOTraceEntry(struct vnode * vnode,void * cookie,off_t pos,const generic_io_vec * vecs,uint32 count,uint32 flags,generic_size_t bytesRequested,status_t status,generic_size_t bytesTransferred)607 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
608 		const generic_io_vec* vecs, uint32 count, uint32 flags,
609 		generic_size_t bytesRequested, status_t status,
610 		generic_size_t bytesTransferred)
611 		:
612 		fVnode(vnode),
613 		fMountID(vnode->mount->id),
614 		fNodeID(vnode->id),
615 		fCookie(cookie),
616 		fPos(pos),
617 		fCount(count),
618 		fFlags(flags),
619 		fBytesRequested(bytesRequested),
620 		fStatus(status),
621 		fBytesTransferred(bytesTransferred)
622 	{
623 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
624 			sizeof(generic_io_vec) * count, false);
625 	}
626 
AddDump(TraceOutput & out,const char * mode)627 	void AddDump(TraceOutput& out, const char* mode)
628 	{
629 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
630 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
631 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
632 			(uint64)fBytesRequested);
633 
634 		if (fVecs != NULL) {
635 			for (uint32 i = 0; i < fCount; i++) {
636 				if (i > 0)
637 					out.Print(", ");
638 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
639 					(uint64)fVecs[i].length);
640 			}
641 		}
642 
643 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
644 			"transferred: %" B_PRIu64, fFlags, fStatus,
645 			(uint64)fBytesTransferred);
646 	}
647 
648 protected:
649 	struct vnode*	fVnode;
650 	dev_t			fMountID;
651 	ino_t			fNodeID;
652 	void*			fCookie;
653 	off_t			fPos;
654 	generic_io_vec*	fVecs;
655 	uint32			fCount;
656 	uint32			fFlags;
657 	generic_size_t	fBytesRequested;
658 	status_t		fStatus;
659 	generic_size_t	fBytesTransferred;
660 };
661 
662 
663 class ReadPages : public PagesIOTraceEntry {
664 public:
ReadPages(struct vnode * vnode,void * cookie,off_t pos,const generic_io_vec * vecs,uint32 count,uint32 flags,generic_size_t bytesRequested,status_t status,generic_size_t bytesTransferred)665 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
666 		const generic_io_vec* vecs, uint32 count, uint32 flags,
667 		generic_size_t bytesRequested, status_t status,
668 		generic_size_t bytesTransferred)
669 		:
670 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
671 			bytesRequested, status, bytesTransferred)
672 	{
673 		Initialized();
674 	}
675 
AddDump(TraceOutput & out)676 	virtual void AddDump(TraceOutput& out)
677 	{
678 		PagesIOTraceEntry::AddDump(out, "read");
679 	}
680 };
681 
682 
683 class WritePages : public PagesIOTraceEntry {
684 public:
WritePages(struct vnode * vnode,void * cookie,off_t pos,const generic_io_vec * vecs,uint32 count,uint32 flags,generic_size_t bytesRequested,status_t status,generic_size_t bytesTransferred)685 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
686 		const generic_io_vec* vecs, uint32 count, uint32 flags,
687 		generic_size_t bytesRequested, status_t status,
688 		generic_size_t bytesTransferred)
689 		:
690 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
691 			bytesRequested, status, bytesTransferred)
692 	{
693 		Initialized();
694 	}
695 
AddDump(TraceOutput & out)696 	virtual void AddDump(TraceOutput& out)
697 	{
698 		PagesIOTraceEntry::AddDump(out, "write");
699 	}
700 };
701 
702 }	// namespace VFSPagesIOTracing
703 
704 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
705 #else
706 #	define TPIO(x) ;
707 #endif	// VFS_PAGES_IO_TRACING
708 
709 
710 /*! Finds the mounted device (the fs_mount structure) with the given ID.
711 	Note, you must hold the sMountLock lock when you call this function.
712 */
713 static struct fs_mount*
find_mount(dev_t id)714 find_mount(dev_t id)
715 {
716 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
717 
718 	return sMountsTable->Lookup(id);
719 }
720 
721 
722 static status_t
get_mount(dev_t id,struct fs_mount ** _mount)723 get_mount(dev_t id, struct fs_mount** _mount)
724 {
725 	struct fs_mount* mount;
726 
727 	ReadLocker nodeLocker(sVnodeLock);
728 	ReadLocker mountLocker(sMountLock);
729 
730 	mount = find_mount(id);
731 	if (mount == NULL)
732 		return B_BAD_VALUE;
733 
734 	struct vnode* rootNode = mount->root_vnode;
735 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
736 			|| rootNode->ref_count == 0) {
737 		// might have been called during a mount/unmount operation
738 		return B_BUSY;
739 	}
740 
741 	inc_vnode_ref_count(rootNode);
742 	*_mount = mount;
743 	return B_OK;
744 }
745 
746 
747 static void
put_mount(struct fs_mount * mount)748 put_mount(struct fs_mount* mount)
749 {
750 	if (mount)
751 		put_vnode(mount->root_vnode);
752 }
753 
754 
755 /*!	Tries to open the specified file system module.
756 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
757 	Returns a pointer to file system module interface, or NULL if it
758 	could not open the module.
759 */
760 static file_system_module_info*
get_file_system(const char * fsName)761 get_file_system(const char* fsName)
762 {
763 	char name[B_FILE_NAME_LENGTH];
764 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
765 		// construct module name if we didn't get one
766 		// (we currently support only one API)
767 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
768 		fsName = NULL;
769 	}
770 
771 	file_system_module_info* info;
772 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
773 		return NULL;
774 
775 	return info;
776 }
777 
778 
779 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
780 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
781 	The name is allocated for you, and you have to free() it when you're
782 	done with it.
783 	Returns NULL if the required memory is not available.
784 */
785 static char*
get_file_system_name(const char * fsName)786 get_file_system_name(const char* fsName)
787 {
788 	const size_t length = strlen("file_systems/");
789 
790 	if (strncmp(fsName, "file_systems/", length)) {
791 		// the name already seems to be the module's file name
792 		return strdup(fsName);
793 	}
794 
795 	fsName += length;
796 	const char* end = strchr(fsName, '/');
797 	if (end == NULL) {
798 		// this doesn't seem to be a valid name, but well...
799 		return strdup(fsName);
800 	}
801 
802 	// cut off the trailing /v1
803 
804 	char* name = (char*)malloc(end + 1 - fsName);
805 	if (name == NULL)
806 		return NULL;
807 
808 	strlcpy(name, fsName, end + 1 - fsName);
809 	return name;
810 }
811 
812 
813 /*!	Accepts a list of file system names separated by a colon, one for each
814 	layer and returns the file system name for the specified layer.
815 	The name is allocated for you, and you have to free() it when you're
816 	done with it.
817 	Returns NULL if the required memory is not available or if there is no
818 	name for the specified layer.
819 */
820 static char*
get_file_system_name_for_layer(const char * fsNames,int32 layer)821 get_file_system_name_for_layer(const char* fsNames, int32 layer)
822 {
823 	while (layer >= 0) {
824 		const char* end = strchr(fsNames, ':');
825 		if (end == NULL) {
826 			if (layer == 0)
827 				return strdup(fsNames);
828 			return NULL;
829 		}
830 
831 		if (layer == 0) {
832 			size_t length = end - fsNames + 1;
833 			char* result = (char*)malloc(length);
834 			strlcpy(result, fsNames, length);
835 			return result;
836 		}
837 
838 		fsNames = end + 1;
839 		layer--;
840 	}
841 
842 	return NULL;
843 }
844 
845 
846 static void
add_vnode_to_mount_list(struct vnode * vnode,struct fs_mount * mount)847 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
848 {
849 	MutexLocker _(mount->lock);
850 	mount->vnodes.Add(vnode);
851 }
852 
853 
854 static void
remove_vnode_from_mount_list(struct vnode * vnode,struct fs_mount * mount)855 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
856 {
857 	MutexLocker _(mount->lock);
858 	mount->vnodes.Remove(vnode);
859 }
860 
861 
862 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
863 
864 	The caller must hold the sVnodeLock (read lock at least).
865 
866 	\param mountID the mount ID.
867 	\param vnodeID the node ID.
868 
869 	\return The vnode structure, if it was found in the hash table, \c NULL
870 			otherwise.
871 */
872 static struct vnode*
lookup_vnode(dev_t mountID,ino_t vnodeID)873 lookup_vnode(dev_t mountID, ino_t vnodeID)
874 {
875 	ASSERT_READ_LOCKED_RW_LOCK(&sVnodeLock);
876 
877 	struct vnode_hash_key key;
878 
879 	key.device = mountID;
880 	key.vnode = vnodeID;
881 
882 	return sVnodeTable->Lookup(key);
883 }
884 
885 
886 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
887 
888 	This will also wait for BUSY_VNODE_DELAY before returning if one should
889 	still wait for the vnode becoming unbusy.
890 
891 	\return \c true if one should retry, \c false if not.
892 */
893 static bool
retry_busy_vnode(int32 & tries,dev_t mountID,ino_t vnodeID)894 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
895 {
896 	if (--tries < 0) {
897 		// vnode doesn't seem to become unbusy
898 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
899 			" is not becoming unbusy!\n", mountID, vnodeID);
900 		return false;
901 	}
902 	snooze(BUSY_VNODE_DELAY);
903 	return true;
904 }
905 
906 
907 /*!	Creates a new vnode with the given mount and node ID.
908 	If the node already exists, it is returned instead and no new node is
909 	created. In either case -- but not, if an error occurs -- the function write
910 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
911 	error the lock is not held on return.
912 
913 	\param mountID The mount ID.
914 	\param vnodeID The vnode ID.
915 	\param _vnode Will be set to the new vnode on success.
916 	\param _nodeCreated Will be set to \c true when the returned vnode has
917 		been newly created, \c false when it already existed. Will not be
918 		changed on error.
919 	\return \c B_OK, when the vnode was successfully created and inserted or
920 		a node with the given ID was found, \c B_NO_MEMORY or
921 		\c B_ENTRY_NOT_FOUND on error.
922 */
923 static status_t
create_new_vnode_and_lock(dev_t mountID,ino_t vnodeID,struct vnode * & _vnode,bool & _nodeCreated)924 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
925 	bool& _nodeCreated)
926 {
927 	FUNCTION(("create_new_vnode_and_lock()\n"));
928 
929 	struct vnode* vnode = (struct vnode*)object_cache_alloc(sVnodeCache, 0);
930 	if (vnode == NULL)
931 		return B_NO_MEMORY;
932 
933 	// initialize basic values
934 	memset(vnode, 0, sizeof(struct vnode));
935 	vnode->device = mountID;
936 	vnode->id = vnodeID;
937 	vnode->ref_count = 1;
938 	vnode->SetBusy(true);
939 
940 	// look up the node -- it might have been added by someone else in the
941 	// meantime
942 	rw_lock_write_lock(&sVnodeLock);
943 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
944 	if (existingVnode != NULL) {
945 		object_cache_free(sVnodeCache, vnode, 0);
946 		_vnode = existingVnode;
947 		_nodeCreated = false;
948 		return B_OK;
949 	}
950 
951 	// get the mount structure
952 	rw_lock_read_lock(&sMountLock);
953 	vnode->mount = find_mount(mountID);
954 	if (!vnode->mount || vnode->mount->unmounting) {
955 		rw_lock_read_unlock(&sMountLock);
956 		rw_lock_write_unlock(&sVnodeLock);
957 		object_cache_free(sVnodeCache, vnode, 0);
958 		return B_ENTRY_NOT_FOUND;
959 	}
960 
961 	// add the vnode to the mount's node list and the hash table
962 	sVnodeTable->Insert(vnode);
963 	add_vnode_to_mount_list(vnode, vnode->mount);
964 
965 	rw_lock_read_unlock(&sMountLock);
966 
967 	_vnode = vnode;
968 	_nodeCreated = true;
969 
970 	// keep the vnode lock locked
971 	return B_OK;
972 }
973 
974 
975 /*!	Frees the vnode and all resources it has acquired, and removes
976 	it from the vnode hash as well as from its mount structure.
977 	Will also make sure that any cache modifications are written back.
978 */
979 static void
free_vnode(struct vnode * vnode,bool reenter)980 free_vnode(struct vnode* vnode, bool reenter)
981 {
982 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
983 		vnode);
984 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
985 
986 	// write back any changes in this vnode's cache -- but only
987 	// if the vnode won't be deleted, in which case the changes
988 	// will be discarded
989 
990 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
991 		FS_CALL_NO_PARAMS(vnode, fsync);
992 
993 	// Note: If this vnode has a cache attached, there will still be two
994 	// references to that cache at this point. The last one belongs to the vnode
995 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
996 	// cache. Each but the last reference to a cache also includes a reference
997 	// to the vnode. The file cache, however, released its reference (cf.
998 	// file_cache_create()), so that this vnode's ref count has the chance to
999 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1000 	// cache reference to be released, which will also release a (no longer
1001 	// existing) vnode reference. To ensure that will be ignored, and that no
1002 	// other consumers will acquire this vnode in the meantime, we make the
1003 	// vnode's ref count negative.
1004 	vnode->ref_count = -1;
1005 
1006 	if (!vnode->IsUnpublished()) {
1007 		if (vnode->IsRemoved())
1008 			FS_CALL(vnode, remove_vnode, reenter);
1009 		else
1010 			FS_CALL(vnode, put_vnode, reenter);
1011 	}
1012 
1013 	// If the vnode has a VMCache attached, make sure that it won't try to get
1014 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1015 	// long as the vnode is busy and in the hash, that won't happen, but as
1016 	// soon as we've removed it from the hash, it could reload the vnode -- with
1017 	// a new cache attached!
1018 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1019 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1020 
1021 	// The file system has removed the resources of the vnode now, so we can
1022 	// make it available again (by removing the busy vnode from the hash).
1023 	rw_lock_write_lock(&sVnodeLock);
1024 	sVnodeTable->Remove(vnode);
1025 	rw_lock_write_unlock(&sVnodeLock);
1026 
1027 	// if we have a VMCache attached, remove it
1028 	if (vnode->cache)
1029 		vnode->cache->ReleaseRef();
1030 
1031 	vnode->cache = NULL;
1032 
1033 	remove_vnode_from_mount_list(vnode, vnode->mount);
1034 
1035 	object_cache_free(sVnodeCache, vnode, 0);
1036 }
1037 
1038 
1039 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1040 	if the counter dropped to 0.
1041 
1042 	The caller must, of course, own a reference to the vnode to call this
1043 	function.
1044 	The caller must not hold the sVnodeLock or the sMountLock.
1045 
1046 	\param vnode the vnode.
1047 	\param alwaysFree don't move this vnode into the unused list, but really
1048 		   delete it if possible.
1049 	\param reenter \c true, if this function is called (indirectly) from within
1050 		   a file system. This will be passed to file system hooks only.
1051 	\return \c B_OK, if everything went fine, an error code otherwise.
1052 */
1053 static status_t
dec_vnode_ref_count(struct vnode * vnode,bool alwaysFree,bool reenter)1054 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1055 {
1056 	ReadLocker locker(sVnodeLock);
1057 	AutoLocker<Vnode> nodeLocker(vnode);
1058 
1059 	const int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1060 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1061 
1062 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1063 		vnode->ref_count));
1064 
1065 	if (oldRefCount != 1)
1066 		return B_OK;
1067 
1068 	if (vnode->IsBusy())
1069 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1070 
1071 	bool freeNode = false;
1072 	bool freeUnusedNodes = false;
1073 
1074 	// Just insert the vnode into an unused list if we don't need
1075 	// to delete it
1076 	if (vnode->IsRemoved() || alwaysFree) {
1077 		vnode_to_be_freed(vnode);
1078 		vnode->SetBusy(true);
1079 		freeNode = true;
1080 	} else
1081 		freeUnusedNodes = vnode_unused(vnode);
1082 
1083 	nodeLocker.Unlock();
1084 	locker.Unlock();
1085 
1086 	if (freeNode)
1087 		free_vnode(vnode, reenter);
1088 	else if (freeUnusedNodes)
1089 		free_unused_vnodes();
1090 
1091 	return B_OK;
1092 }
1093 
1094 
1095 /*!	\brief Increments the reference counter of the given vnode.
1096 
1097 	The caller must make sure that the node isn't deleted while this function
1098 	is called. This can be done either:
1099 	- by ensuring that a reference to the node exists and remains in existence,
1100 	  or
1101 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1102 	  or by holding sVnodeLock write locked.
1103 
1104 	In the second case the caller is responsible for dealing with the ref count
1105 	0 -> 1 transition. That is 1. this function must not be invoked when the
1106 	node is busy in the first place and 2. vnode_used() must be called for the
1107 	node.
1108 
1109 	\param vnode the vnode.
1110 	\returns the old reference count.
1111 */
1112 static int32
inc_vnode_ref_count(struct vnode * vnode)1113 inc_vnode_ref_count(struct vnode* vnode)
1114 {
1115 	const int32 oldCount = atomic_add(&vnode->ref_count, 1);
1116 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1117 		oldCount + 1));
1118 	ASSERT(oldCount >= 0);
1119 	return oldCount;
1120 }
1121 
1122 
1123 static bool
is_special_node_type(int type)1124 is_special_node_type(int type)
1125 {
1126 	// at the moment only FIFOs are supported
1127 	return S_ISFIFO(type);
1128 }
1129 
1130 
1131 static status_t
create_special_sub_node(struct vnode * vnode,uint32 flags)1132 create_special_sub_node(struct vnode* vnode, uint32 flags)
1133 {
1134 	if (S_ISFIFO(vnode->Type()))
1135 		return create_fifo_vnode(vnode->mount->volume, vnode);
1136 
1137 	return B_BAD_VALUE;
1138 }
1139 
1140 
1141 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1142 
1143 	If the node is not yet in memory, it will be loaded.
1144 
1145 	The caller must not hold the sVnodeLock or the sMountLock.
1146 
1147 	\param mountID the mount ID.
1148 	\param vnodeID the node ID.
1149 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1150 		   retrieved vnode structure shall be written.
1151 	\param reenter \c true, if this function is called (indirectly) from within
1152 		   a file system.
1153 	\return \c B_OK, if everything when fine, an error code otherwise.
1154 */
1155 static status_t
get_vnode(dev_t mountID,ino_t vnodeID,struct vnode ** _vnode,bool canWait,int reenter)1156 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1157 	int reenter)
1158 {
1159 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1160 		mountID, vnodeID, _vnode));
1161 
1162 	rw_lock_read_lock(&sVnodeLock);
1163 
1164 	int32 tries = BUSY_VNODE_RETRIES;
1165 restart:
1166 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1167 
1168 	if (vnode != NULL && !vnode->IsBusy()) {
1169 		// Try to increment the vnode's reference count without locking.
1170 		// (We can't use atomic_add here, as if the vnode is unused,
1171 		// we need to hold its lock to mark it used again.)
1172 		const int32 oldRefCount = atomic_get(&vnode->ref_count);
1173 		if (oldRefCount > 0 && atomic_test_and_set(&vnode->ref_count,
1174 				oldRefCount + 1, oldRefCount) == oldRefCount) {
1175 			rw_lock_read_unlock(&sVnodeLock);
1176 			*_vnode = vnode;
1177 			return B_OK;
1178 		}
1179 	}
1180 
1181 	AutoLocker<Vnode> nodeLocker(vnode);
1182 
1183 	if (vnode != NULL && vnode->IsBusy()) {
1184 		// vnodes in the Removed state (except ones still Unpublished)
1185 		// which are also Busy will disappear soon, so we do not wait for them.
1186 		const bool doNotWait = vnode->IsRemoved() && !vnode->IsUnpublished();
1187 
1188 		nodeLocker.Unlock();
1189 		rw_lock_read_unlock(&sVnodeLock);
1190 		if (!canWait) {
1191 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1192 				mountID, vnodeID);
1193 			return B_BUSY;
1194 		}
1195 		if (doNotWait || !retry_busy_vnode(tries, mountID, vnodeID))
1196 			return B_BUSY;
1197 
1198 		rw_lock_read_lock(&sVnodeLock);
1199 		goto restart;
1200 	}
1201 
1202 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1203 
1204 	if (vnode != NULL) {
1205 		if (inc_vnode_ref_count(vnode) == 0) {
1206 			// this vnode has been unused before
1207 			vnode_used(vnode);
1208 		}
1209 
1210 		nodeLocker.Unlock();
1211 		rw_lock_read_unlock(&sVnodeLock);
1212 	} else {
1213 		// we need to create a new vnode and read it in
1214 		rw_lock_read_unlock(&sVnodeLock);
1215 			// unlock -- create_new_vnode_and_lock() write-locks on success
1216 		bool nodeCreated;
1217 		status_t status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1218 			nodeCreated);
1219 		if (status != B_OK)
1220 			return status;
1221 
1222 		if (!nodeCreated) {
1223 			rw_lock_read_lock(&sVnodeLock);
1224 			rw_lock_write_unlock(&sVnodeLock);
1225 			goto restart;
1226 		}
1227 
1228 		rw_lock_write_unlock(&sVnodeLock);
1229 
1230 		int type = 0;
1231 		uint32 flags = 0;
1232 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1233 			&flags, reenter);
1234 		if (status == B_OK && (vnode->private_node == NULL || vnode->ops == NULL)) {
1235 			KDEBUG_ONLY(panic("filesystem get_vnode returned 0 with unset fields"));
1236 			status = B_BAD_VALUE;
1237 		}
1238 
1239 		bool gotNode = status == B_OK;
1240 		bool publishSpecialSubNode = false;
1241 		if (gotNode) {
1242 			vnode->SetType(type);
1243 			publishSpecialSubNode = is_special_node_type(type)
1244 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1245 		}
1246 
1247 		if (gotNode && publishSpecialSubNode)
1248 			status = create_special_sub_node(vnode, flags);
1249 
1250 		if (status != B_OK) {
1251 			if (gotNode)
1252 				FS_CALL(vnode, put_vnode, reenter);
1253 
1254 			rw_lock_write_lock(&sVnodeLock);
1255 			sVnodeTable->Remove(vnode);
1256 			remove_vnode_from_mount_list(vnode, vnode->mount);
1257 			rw_lock_write_unlock(&sVnodeLock);
1258 
1259 			object_cache_free(sVnodeCache, vnode, 0);
1260 			return status;
1261 		}
1262 
1263 		rw_lock_read_lock(&sVnodeLock);
1264 		vnode->Lock();
1265 
1266 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1267 		vnode->SetBusy(false);
1268 
1269 		vnode->Unlock();
1270 		rw_lock_read_unlock(&sVnodeLock);
1271 	}
1272 
1273 	TRACE(("get_vnode: returning %p\n", vnode));
1274 
1275 	*_vnode = vnode;
1276 	return B_OK;
1277 }
1278 
1279 
1280 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1281 	if the counter dropped to 0.
1282 
1283 	The caller must, of course, own a reference to the vnode to call this
1284 	function.
1285 	The caller must not hold the sVnodeLock or the sMountLock.
1286 
1287 	\param vnode the vnode.
1288 */
1289 static inline void
put_vnode(struct vnode * vnode)1290 put_vnode(struct vnode* vnode)
1291 {
1292 	dec_vnode_ref_count(vnode, false, false);
1293 }
1294 
1295 
1296 static void
free_unused_vnodes(int32 level)1297 free_unused_vnodes(int32 level)
1298 {
1299 	unused_vnodes_check_started();
1300 
1301 	if (level == B_NO_LOW_RESOURCE) {
1302 		unused_vnodes_check_done();
1303 		return;
1304 	}
1305 
1306 	flush_hot_vnodes();
1307 
1308 	// determine how many nodes to free
1309 	uint32 count = 1;
1310 	{
1311 		ReadLocker hotVnodesReadLocker(sHotVnodesLock);
1312 		InterruptsSpinLocker unusedVnodesLocker(sUnusedVnodesLock);
1313 
1314 		switch (level) {
1315 			case B_LOW_RESOURCE_NOTE:
1316 				count = sUnusedVnodes / 100;
1317 				break;
1318 			case B_LOW_RESOURCE_WARNING:
1319 				count = sUnusedVnodes / 10;
1320 				break;
1321 			case B_LOW_RESOURCE_CRITICAL:
1322 				count = sUnusedVnodes;
1323 				break;
1324 		}
1325 
1326 		if (count > sUnusedVnodes)
1327 			count = sUnusedVnodes;
1328 	}
1329 
1330 	// Write back the modified pages of some unused vnodes and free them.
1331 
1332 	for (uint32 i = 0; i < count; i++) {
1333 		ReadLocker vnodesReadLocker(sVnodeLock);
1334 		ReadLocker hotVnodesReadLocker(sHotVnodesLock);
1335 
1336 		// get the first node
1337 		InterruptsSpinLocker unusedVnodesLocker(sUnusedVnodesLock);
1338 		struct vnode* vnode = sUnusedVnodeList.First();
1339 		unusedVnodesLocker.Unlock();
1340 
1341 		if (vnode == NULL)
1342 			break;
1343 
1344 		// lock the node
1345 		AutoLocker<Vnode> nodeLocker(vnode);
1346 
1347 		// Check whether the node is still unused -- since we only append to the
1348 		// tail of the unused queue, the vnode should still be at its head.
1349 		// Alternatively we could check its ref count for 0 and its busy flag,
1350 		// but if the node is no longer at the head of the queue, it means it
1351 		// has been touched in the meantime, i.e. it is no longer the least
1352 		// recently used unused vnode and we rather don't free it.
1353 		unusedVnodesLocker.Lock();
1354 		if (vnode != sUnusedVnodeList.First())
1355 			continue;
1356 		unusedVnodesLocker.Unlock();
1357 
1358 		ASSERT(!vnode->IsBusy());
1359 
1360 		// grab a reference
1361 		inc_vnode_ref_count(vnode);
1362 		vnode_used(vnode);
1363 
1364 		// write back changes and free the node
1365 		nodeLocker.Unlock();
1366 		hotVnodesReadLocker.Unlock();
1367 		vnodesReadLocker.Unlock();
1368 
1369 		if (vnode->cache != NULL)
1370 			vnode->cache->WriteModified();
1371 
1372 		dec_vnode_ref_count(vnode, true, false);
1373 			// this should free the vnode when it's still unused
1374 	}
1375 
1376 	unused_vnodes_check_done();
1377 }
1378 
1379 
1380 /*!	Gets the vnode the given vnode is covering.
1381 
1382 	The caller must have \c sVnodeLock read-locked at least.
1383 
1384 	The function returns a reference to the retrieved vnode (if any), the caller
1385 	is responsible to free.
1386 
1387 	\param vnode The vnode whose covered node shall be returned.
1388 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1389 		vnode.
1390 */
1391 static inline Vnode*
get_covered_vnode_locked(Vnode * vnode)1392 get_covered_vnode_locked(Vnode* vnode)
1393 {
1394 	if (Vnode* coveredNode = vnode->covers) {
1395 		while (coveredNode->covers != NULL)
1396 			coveredNode = coveredNode->covers;
1397 
1398 		inc_vnode_ref_count(coveredNode);
1399 		return coveredNode;
1400 	}
1401 
1402 	return NULL;
1403 }
1404 
1405 
1406 /*!	Gets the vnode the given vnode is covering.
1407 
1408 	The caller must not hold \c sVnodeLock. Note that this implies a race
1409 	condition, since the situation can change at any time.
1410 
1411 	The function returns a reference to the retrieved vnode (if any), the caller
1412 	is responsible to free.
1413 
1414 	\param vnode The vnode whose covered node shall be returned.
1415 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1416 		vnode.
1417 */
1418 static inline Vnode*
get_covered_vnode(Vnode * vnode)1419 get_covered_vnode(Vnode* vnode)
1420 {
1421 	if (!vnode->IsCovering())
1422 		return NULL;
1423 
1424 	ReadLocker vnodeReadLocker(sVnodeLock);
1425 	return get_covered_vnode_locked(vnode);
1426 }
1427 
1428 
1429 /*!	Gets the vnode the given vnode is covered by.
1430 
1431 	The caller must have \c sVnodeLock read-locked at least.
1432 
1433 	The function returns a reference to the retrieved vnode (if any), the caller
1434 	is responsible to free.
1435 
1436 	\param vnode The vnode whose covering node shall be returned.
1437 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1438 		any vnode.
1439 */
1440 static Vnode*
get_covering_vnode_locked(Vnode * vnode)1441 get_covering_vnode_locked(Vnode* vnode)
1442 {
1443 	if (Vnode* coveringNode = vnode->covered_by) {
1444 		while (coveringNode->covered_by != NULL)
1445 			coveringNode = coveringNode->covered_by;
1446 
1447 		inc_vnode_ref_count(coveringNode);
1448 		return coveringNode;
1449 	}
1450 
1451 	return NULL;
1452 }
1453 
1454 
1455 /*!	Gets the vnode the given vnode is covered by.
1456 
1457 	The caller must not hold \c sVnodeLock. Note that this implies a race
1458 	condition, since the situation can change at any time.
1459 
1460 	The function returns a reference to the retrieved vnode (if any), the caller
1461 	is responsible to free.
1462 
1463 	\param vnode The vnode whose covering node shall be returned.
1464 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1465 		any vnode.
1466 */
1467 static inline Vnode*
get_covering_vnode(Vnode * vnode)1468 get_covering_vnode(Vnode* vnode)
1469 {
1470 	if (!vnode->IsCovered())
1471 		return NULL;
1472 
1473 	ReadLocker vnodeReadLocker(sVnodeLock);
1474 	return get_covering_vnode_locked(vnode);
1475 }
1476 
1477 
1478 static void
free_unused_vnodes()1479 free_unused_vnodes()
1480 {
1481 	free_unused_vnodes(
1482 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1483 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1484 }
1485 
1486 
1487 static void
vnode_low_resource_handler(void *,uint32 resources,int32 level)1488 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1489 {
1490 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1491 
1492 	free_unused_vnodes(level);
1493 }
1494 
1495 
1496 static inline void
put_advisory_locking(struct advisory_locking * locking)1497 put_advisory_locking(struct advisory_locking* locking)
1498 {
1499 	release_sem(locking->lock);
1500 }
1501 
1502 
1503 /*!	Returns the advisory_locking object of the \a vnode in case it
1504 	has one, and locks it.
1505 	You have to call put_advisory_locking() when you're done with
1506 	it.
1507 	Note, you must not have the vnode mutex locked when calling
1508 	this function.
1509 */
1510 static struct advisory_locking*
get_advisory_locking(struct vnode * vnode)1511 get_advisory_locking(struct vnode* vnode)
1512 {
1513 	rw_lock_read_lock(&sVnodeLock);
1514 	vnode->Lock();
1515 
1516 	struct advisory_locking* locking = vnode->advisory_locking;
1517 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1518 
1519 	vnode->Unlock();
1520 	rw_lock_read_unlock(&sVnodeLock);
1521 
1522 	if (lock >= 0)
1523 		lock = acquire_sem(lock);
1524 	if (lock < 0) {
1525 		// This means the locking has been deleted in the mean time
1526 		// or had never existed in the first place - otherwise, we
1527 		// would get the lock at some point.
1528 		return NULL;
1529 	}
1530 
1531 	return locking;
1532 }
1533 
1534 
1535 /*!	Creates a locked advisory_locking object, and attaches it to the
1536 	given \a vnode.
1537 	Returns B_OK in case of success - also if the vnode got such an
1538 	object from someone else in the mean time, you'll still get this
1539 	one locked then.
1540 */
1541 static status_t
create_advisory_locking(struct vnode * vnode)1542 create_advisory_locking(struct vnode* vnode)
1543 {
1544 	if (vnode == NULL)
1545 		return B_FILE_ERROR;
1546 
1547 	ObjectDeleter<advisory_locking> lockingDeleter;
1548 	struct advisory_locking* locking = NULL;
1549 
1550 	while (get_advisory_locking(vnode) == NULL) {
1551 		// no locking object set on the vnode yet, create one
1552 		if (locking == NULL) {
1553 			locking = new(std::nothrow) advisory_locking;
1554 			if (locking == NULL)
1555 				return B_NO_MEMORY;
1556 			lockingDeleter.SetTo(locking);
1557 
1558 			locking->wait_sem = create_sem(0, "advisory lock");
1559 			if (locking->wait_sem < 0)
1560 				return locking->wait_sem;
1561 
1562 			locking->lock = create_sem(0, "advisory locking");
1563 			if (locking->lock < 0)
1564 				return locking->lock;
1565 		}
1566 
1567 		// set our newly created locking object
1568 		ReadLocker _(sVnodeLock);
1569 		AutoLocker<Vnode> nodeLocker(vnode);
1570 		if (vnode->advisory_locking == NULL) {
1571 			vnode->advisory_locking = locking;
1572 			lockingDeleter.Detach();
1573 			return B_OK;
1574 		}
1575 	}
1576 
1577 	// The vnode already had a locking object. That's just as well.
1578 
1579 	return B_OK;
1580 }
1581 
1582 
1583 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1584 	with the advisory_lock \a lock.
1585 */
1586 static bool
advisory_lock_intersects(struct advisory_lock * lock,struct flock * flock)1587 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1588 {
1589 	if (flock == NULL)
1590 		return true;
1591 
1592 	return lock->start <= flock->l_start - 1 + flock->l_len
1593 		&& lock->end >= flock->l_start;
1594 }
1595 
1596 
1597 /*!	Tests whether acquiring a lock would block.
1598 */
1599 static status_t
test_advisory_lock(struct vnode * vnode,struct flock * flock)1600 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1601 {
1602 	flock->l_type = F_UNLCK;
1603 
1604 	struct advisory_locking* locking = get_advisory_locking(vnode);
1605 	if (locking == NULL)
1606 		return B_OK;
1607 
1608 	team_id team = team_get_current_team_id();
1609 
1610 	LockList::Iterator iterator = locking->locks.GetIterator();
1611 	while (iterator.HasNext()) {
1612 		struct advisory_lock* lock = iterator.Next();
1613 
1614 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1615 			// locks do overlap
1616 			if (flock->l_type != F_RDLCK || !lock->shared) {
1617 				// collision
1618 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1619 				flock->l_whence = SEEK_SET;
1620 				flock->l_start = lock->start;
1621 				flock->l_len = lock->end - lock->start + 1;
1622 				flock->l_pid = lock->team;
1623 				break;
1624 			}
1625 		}
1626 	}
1627 
1628 	put_advisory_locking(locking);
1629 	return B_OK;
1630 }
1631 
1632 
1633 /*!	Removes the specified lock, or all locks of the calling team
1634 	if \a flock is NULL.
1635 */
1636 static status_t
release_advisory_lock(struct vnode * vnode,struct io_context * context,struct file_descriptor * descriptor,struct flock * flock)1637 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1638 	struct file_descriptor* descriptor, struct flock* flock)
1639 {
1640 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1641 
1642 	struct advisory_locking* locking = get_advisory_locking(vnode);
1643 	if (locking == NULL)
1644 		return B_OK;
1645 
1646 	// find matching lock entries
1647 
1648 	LockList::Iterator iterator = locking->locks.GetIterator();
1649 	while (iterator.HasNext()) {
1650 		struct advisory_lock* lock = iterator.Next();
1651 		bool removeLock = false;
1652 
1653 		if (descriptor != NULL && lock->bound_to == descriptor) {
1654 			// Remove flock() locks
1655 			removeLock = true;
1656 		} else if (lock->bound_to == context
1657 				&& advisory_lock_intersects(lock, flock)) {
1658 			// Remove POSIX locks
1659 			bool endsBeyond = false;
1660 			bool startsBefore = false;
1661 			if (flock != NULL) {
1662 				startsBefore = lock->start < flock->l_start;
1663 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1664 			}
1665 
1666 			if (!startsBefore && !endsBeyond) {
1667 				// lock is completely contained in flock
1668 				removeLock = true;
1669 			} else if (startsBefore && !endsBeyond) {
1670 				// cut the end of the lock
1671 				lock->end = flock->l_start - 1;
1672 			} else if (!startsBefore && endsBeyond) {
1673 				// cut the start of the lock
1674 				lock->start = flock->l_start + flock->l_len;
1675 			} else {
1676 				// divide the lock into two locks
1677 				struct advisory_lock* secondLock = new advisory_lock;
1678 				if (secondLock == NULL) {
1679 					// TODO: we should probably revert the locks we already
1680 					// changed... (ie. allocate upfront)
1681 					put_advisory_locking(locking);
1682 					return B_NO_MEMORY;
1683 				}
1684 
1685 				lock->end = flock->l_start - 1;
1686 
1687 				secondLock->bound_to = context;
1688 				secondLock->team = lock->team;
1689 				secondLock->session = lock->session;
1690 				// values must already be normalized when getting here
1691 				secondLock->start = flock->l_start + flock->l_len;
1692 				secondLock->end = lock->end;
1693 				secondLock->shared = lock->shared;
1694 
1695 				locking->locks.Add(secondLock);
1696 			}
1697 		}
1698 
1699 		if (removeLock) {
1700 			// this lock is no longer used
1701 			iterator.Remove();
1702 			delete lock;
1703 		}
1704 	}
1705 
1706 	bool removeLocking = locking->locks.IsEmpty();
1707 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1708 
1709 	put_advisory_locking(locking);
1710 
1711 	if (removeLocking) {
1712 		// We can remove the whole advisory locking structure; it's no
1713 		// longer used
1714 		locking = get_advisory_locking(vnode);
1715 		if (locking != NULL) {
1716 			ReadLocker locker(sVnodeLock);
1717 			AutoLocker<Vnode> nodeLocker(vnode);
1718 
1719 			// the locking could have been changed in the mean time
1720 			if (locking->locks.IsEmpty()) {
1721 				vnode->advisory_locking = NULL;
1722 				nodeLocker.Unlock();
1723 				locker.Unlock();
1724 
1725 				// we've detached the locking from the vnode, so we can
1726 				// safely delete it
1727 				delete locking;
1728 			} else {
1729 				// the locking is in use again
1730 				nodeLocker.Unlock();
1731 				locker.Unlock();
1732 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1733 			}
1734 		}
1735 	}
1736 
1737 	return B_OK;
1738 }
1739 
1740 
1741 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1742 	will wait for the lock to become available, if there are any collisions
1743 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1744 
1745 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1746 	BSD flock() semantics are used, that is, all children can unlock the file
1747 	in question (we even allow parents to remove the lock, though, but that
1748 	seems to be in line to what the BSD's are doing).
1749 */
1750 static status_t
acquire_advisory_lock(struct vnode * vnode,io_context * context,struct file_descriptor * descriptor,struct flock * flock,bool wait)1751 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1752 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1753 {
1754 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1755 		vnode, flock, wait ? "yes" : "no"));
1756 
1757 	bool shared = flock->l_type == F_RDLCK;
1758 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1759 	status_t status = B_OK;
1760 
1761 	// TODO: do deadlock detection!
1762 
1763 	struct advisory_locking* locking;
1764 
1765 	while (true) {
1766 		// if this vnode has an advisory_locking structure attached,
1767 		// lock that one and search for any colliding file lock
1768 		status = create_advisory_locking(vnode);
1769 		if (status != B_OK)
1770 			return status;
1771 
1772 		locking = vnode->advisory_locking;
1773 		team_id team = team_get_current_team_id();
1774 		sem_id waitForLock = -1;
1775 
1776 		// test for collisions
1777 		LockList::Iterator iterator = locking->locks.GetIterator();
1778 		while (iterator.HasNext()) {
1779 			struct advisory_lock* lock = iterator.Next();
1780 
1781 			// TODO: locks from the same team might be joinable!
1782 			if ((lock->team != team || lock->bound_to != boundTo)
1783 					&& advisory_lock_intersects(lock, flock)) {
1784 				// locks do overlap
1785 				if (!shared || !lock->shared) {
1786 					// we need to wait
1787 					waitForLock = locking->wait_sem;
1788 					break;
1789 				}
1790 			}
1791 		}
1792 
1793 		if (waitForLock < 0)
1794 			break;
1795 
1796 		// We need to wait. Do that or fail now, if we've been asked not to.
1797 
1798 		if (!wait) {
1799 			put_advisory_locking(locking);
1800 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1801 		}
1802 
1803 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1804 			B_CAN_INTERRUPT, 0);
1805 		if (status != B_OK && status != B_BAD_SEM_ID)
1806 			return status;
1807 
1808 		// We have been notified, but we need to re-lock the locking object. So
1809 		// go another round...
1810 	}
1811 
1812 	// install new lock
1813 
1814 	struct advisory_lock* lock = new(std::nothrow) advisory_lock;
1815 	if (lock == NULL) {
1816 		put_advisory_locking(locking);
1817 		return B_NO_MEMORY;
1818 	}
1819 
1820 	lock->bound_to = boundTo;
1821 	lock->team = team_get_current_team_id();
1822 	lock->session = thread_get_current_thread()->team->session_id;
1823 	// values must already be normalized when getting here
1824 	lock->start = flock->l_start;
1825 	lock->end = flock->l_start - 1 + flock->l_len;
1826 	lock->shared = shared;
1827 
1828 	locking->locks.Add(lock);
1829 	put_advisory_locking(locking);
1830 
1831 	return status;
1832 }
1833 
1834 
1835 /*!	Normalizes the \a flock structure to make it easier to compare the
1836 	structure with others. The l_start and l_len fields are set to absolute
1837 	values according to the l_whence field.
1838 */
1839 static status_t
normalize_flock(struct file_descriptor * descriptor,struct flock * flock)1840 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1841 {
1842 	switch (flock->l_whence) {
1843 		case SEEK_SET:
1844 			break;
1845 		case SEEK_CUR:
1846 			flock->l_start += descriptor->pos;
1847 			break;
1848 		case SEEK_END:
1849 		{
1850 			struct vnode* vnode = descriptor->u.vnode;
1851 			struct stat stat;
1852 			status_t status;
1853 
1854 			if (!HAS_FS_CALL(vnode, read_stat))
1855 				return B_UNSUPPORTED;
1856 
1857 			status = FS_CALL(vnode, read_stat, &stat);
1858 			if (status != B_OK)
1859 				return status;
1860 
1861 			flock->l_start += stat.st_size;
1862 			break;
1863 		}
1864 		default:
1865 			return B_BAD_VALUE;
1866 	}
1867 
1868 	if (flock->l_start < 0)
1869 		flock->l_start = 0;
1870 	if (flock->l_len == 0)
1871 		flock->l_len = OFF_MAX;
1872 
1873 	// don't let the offset and length overflow
1874 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1875 		flock->l_len = OFF_MAX - flock->l_start;
1876 
1877 	if (flock->l_len < 0) {
1878 		// a negative length reverses the region
1879 		flock->l_start += flock->l_len;
1880 		flock->l_len = -flock->l_len;
1881 	}
1882 
1883 	return B_OK;
1884 }
1885 
1886 
1887 static void
replace_vnode_if_disconnected(struct fs_mount * mount,struct vnode * vnodeToDisconnect,struct vnode * & vnode,struct vnode * fallBack,bool lockRootLock)1888 replace_vnode_if_disconnected(struct fs_mount* mount,
1889 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1890 	struct vnode* fallBack, bool lockRootLock)
1891 {
1892 	struct vnode* givenVnode = vnode;
1893 	bool vnodeReplaced = false;
1894 
1895 	ReadLocker vnodeReadLocker(sVnodeLock);
1896 
1897 	if (lockRootLock)
1898 		mutex_lock(&sIOContextRootLock);
1899 
1900 	while (vnode != NULL && vnode->mount == mount
1901 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1902 		if (vnode->covers != NULL) {
1903 			// redirect the vnode to the covered vnode
1904 			vnode = vnode->covers;
1905 		} else
1906 			vnode = fallBack;
1907 
1908 		vnodeReplaced = true;
1909 	}
1910 
1911 	// If we've replaced the node, grab a reference for the new one.
1912 	if (vnodeReplaced && vnode != NULL)
1913 		inc_vnode_ref_count(vnode);
1914 
1915 	if (lockRootLock)
1916 		mutex_unlock(&sIOContextRootLock);
1917 
1918 	vnodeReadLocker.Unlock();
1919 
1920 	if (vnodeReplaced)
1921 		put_vnode(givenVnode);
1922 }
1923 
1924 
1925 /*!	Disconnects all file descriptors that are associated with the
1926 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1927 	\a mount object.
1928 
1929 	Note, after you've called this function, there might still be ongoing
1930 	accesses - they won't be interrupted if they already happened before.
1931 	However, any subsequent access will fail.
1932 
1933 	This is not a cheap function and should be used with care and rarely.
1934 	TODO: there is currently no means to stop a blocking read/write!
1935 */
1936 static void
disconnect_mount_or_vnode_fds(struct fs_mount * mount,struct vnode * vnodeToDisconnect)1937 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1938 	struct vnode* vnodeToDisconnect)
1939 {
1940 	// iterate over all teams and peek into their file descriptors
1941 	TeamListIterator teamIterator;
1942 	while (Team* team = teamIterator.Next()) {
1943 		BReference<Team> teamReference(team, true);
1944 		TeamLocker teamLocker(team);
1945 
1946 		// lock the I/O context
1947 		io_context* context = team->io_context;
1948 		if (context == NULL)
1949 			continue;
1950 		WriteLocker contextLocker(context->lock);
1951 
1952 		teamLocker.Unlock();
1953 
1954 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1955 			sRoot, true);
1956 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1957 			sRoot, false);
1958 
1959 		for (uint32 i = 0; i < context->table_size; i++) {
1960 			struct file_descriptor* descriptor = context->fds[i];
1961 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1962 				continue;
1963 
1964 			inc_fd_ref_count(descriptor);
1965 
1966 			// if this descriptor points at this mount, we
1967 			// need to disconnect it to be able to unmount
1968 			struct vnode* vnode = fd_vnode(descriptor);
1969 			if (vnodeToDisconnect != NULL) {
1970 				if (vnode == vnodeToDisconnect)
1971 					disconnect_fd(descriptor);
1972 			} else if ((vnode != NULL && vnode->mount == mount)
1973 				|| (vnode == NULL && descriptor->u.mount == mount))
1974 				disconnect_fd(descriptor);
1975 
1976 			put_fd(descriptor);
1977 		}
1978 	}
1979 }
1980 
1981 
1982 /*!	\brief Gets the root node of the current IO context.
1983 	If \a kernel is \c true, the kernel IO context will be used.
1984 	The caller obtains a reference to the returned node.
1985 */
1986 struct vnode*
get_root_vnode(bool kernel)1987 get_root_vnode(bool kernel)
1988 {
1989 	if (!kernel) {
1990 		// Get current working directory from io context
1991 		struct io_context* context = get_current_io_context(kernel);
1992 
1993 		mutex_lock(&sIOContextRootLock);
1994 
1995 		struct vnode* root = context->root;
1996 		if (root != NULL)
1997 			inc_vnode_ref_count(root);
1998 
1999 		mutex_unlock(&sIOContextRootLock);
2000 
2001 		if (root != NULL)
2002 			return root;
2003 
2004 		// That should never happen.
2005 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
2006 			"have a root\n", team_get_current_team_id());
2007 	}
2008 
2009 	inc_vnode_ref_count(sRoot);
2010 	return sRoot;
2011 }
2012 
2013 
2014 /*!	\brief Gets the directory path and leaf name for a given path.
2015 
2016 	The supplied \a path is transformed to refer to the directory part of
2017 	the entry identified by the original path, and into the buffer \a filename
2018 	the leaf name of the original entry is written.
2019 	Neither the returned path nor the leaf name can be expected to be
2020 	canonical.
2021 
2022 	\param path The path to be analyzed. Must be able to store at least one
2023 		   additional character.
2024 	\param filename The buffer into which the leaf name will be written.
2025 		   Must be of size B_FILE_NAME_LENGTH at least.
2026 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2027 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2028 		   if the given path name is empty.
2029 */
2030 static status_t
get_dir_path_and_leaf(char * path,char * filename)2031 get_dir_path_and_leaf(char* path, char* filename)
2032 {
2033 	if (*path == '\0')
2034 		return B_ENTRY_NOT_FOUND;
2035 
2036 	char* last = strrchr(path, '/');
2037 		// '/' are not allowed in file names!
2038 
2039 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2040 
2041 	if (last == NULL) {
2042 		// this path is single segment with no '/' in it
2043 		// ex. "foo"
2044 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2045 			return B_NAME_TOO_LONG;
2046 
2047 		strcpy(path, ".");
2048 	} else {
2049 		last++;
2050 		if (last[0] == '\0') {
2051 			// special case: the path ends in one or more '/' - remove them
2052 			while (*--last == '/' && last != path);
2053 			last[1] = '\0';
2054 
2055 			if (last == path && last[0] == '/') {
2056 				// This path points to the root of the file system
2057 				strcpy(filename, ".");
2058 				return B_OK;
2059 			}
2060 			for (; last != path && *(last - 1) != '/'; last--);
2061 				// rewind to the start of the leaf before the '/'
2062 		}
2063 
2064 		// normal leaf: replace the leaf portion of the path with a '.'
2065 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2066 			return B_NAME_TOO_LONG;
2067 
2068 		last[0] = '.';
2069 		last[1] = '\0';
2070 	}
2071 	return B_OK;
2072 }
2073 
2074 
2075 static status_t
entry_ref_to_vnode(dev_t mountID,ino_t directoryID,const char * name,bool traverse,bool kernel,VnodePutter & _vnode)2076 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2077 	bool traverse, bool kernel, VnodePutter& _vnode)
2078 {
2079 	char clonedName[B_FILE_NAME_LENGTH + 1];
2080 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2081 		return B_NAME_TOO_LONG;
2082 
2083 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2084 	struct vnode* directory;
2085 
2086 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2087 	if (status < 0)
2088 		return status;
2089 
2090 	return vnode_path_to_vnode(directory, clonedName, traverse, kernel,
2091 		_vnode, NULL);
2092 }
2093 
2094 
2095 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2096 	and returns the respective vnode.
2097 	On success a reference to the vnode is acquired for the caller.
2098 */
2099 static status_t
lookup_dir_entry(struct vnode * dir,const char * name,struct vnode ** _vnode)2100 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2101 {
2102 	ino_t id;
2103 	bool missing;
2104 
2105 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2106 		return missing ? B_ENTRY_NOT_FOUND
2107 			: get_vnode(dir->device, id, _vnode, true, false);
2108 	}
2109 
2110 	status_t status = FS_CALL(dir, lookup, name, &id);
2111 	if (status != B_OK)
2112 		return status;
2113 
2114 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2115 	// have a reference and just need to look the node up.
2116 	rw_lock_read_lock(&sVnodeLock);
2117 	*_vnode = lookup_vnode(dir->device, id);
2118 	rw_lock_read_unlock(&sVnodeLock);
2119 
2120 	if (*_vnode == NULL) {
2121 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2122 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2123 		return B_ENTRY_NOT_FOUND;
2124 	}
2125 
2126 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2127 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2128 //		(*_vnode)->mount->id, (*_vnode)->id);
2129 
2130 	return B_OK;
2131 }
2132 
2133 
2134 /*!	Returns the vnode for the relative \a path starting at the specified \a vnode.
2135 
2136 	\param[in,out] path The relative path being searched. Must not be NULL.
2137 	If the function returns successfully, \a path contains the name of the last path
2138 	component. This function clobbers the buffer pointed to by \a path only
2139 	if it does contain more than one component.
2140 
2141 	If the function fails and leafName is not NULL, \a _vnode contains the last directory,
2142 	the caller has the responsibility to call put_vnode() on it.
2143 
2144 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2145 	it is successful or not!
2146 
2147 	\param[out] _vnode If the function returns B_OK, points to the found node.
2148 	\param[out] _vnode If the function returns something else and leafname is not NULL: set to the
2149 		last existing directory in the path. The caller has responsibility to release it using
2150 		put_vnode().
2151 	\param[out] _vnode If the function returns something else and leafname is NULL: not used.
2152 */
2153 static status_t
vnode_path_to_vnode(struct vnode * start,char * path,bool traverseLeafLink,int count,struct io_context * ioContext,VnodePutter & _vnode,ino_t * _parentID,char * leafName)2154 vnode_path_to_vnode(struct vnode* start, char* path, bool traverseLeafLink,
2155 	int count, struct io_context* ioContext, VnodePutter& _vnode,
2156 	ino_t* _parentID, char* leafName)
2157 {
2158 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2159 	ASSERT(!_vnode.IsSet());
2160 
2161 	VnodePutter vnode(start);
2162 
2163 	if (path == NULL)
2164 		return B_BAD_VALUE;
2165 	if (*path == '\0')
2166 		return B_ENTRY_NOT_FOUND;
2167 
2168 	status_t status = B_OK;
2169 	ino_t lastParentID = vnode->id;
2170 	while (true) {
2171 		char* nextPath;
2172 
2173 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2174 			path));
2175 
2176 		// done?
2177 		if (path[0] == '\0')
2178 			break;
2179 
2180 		// walk to find the next path component ("path" will point to a single
2181 		// path component), and filter out multiple slashes
2182 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2183 				nextPath++);
2184 
2185 		bool directoryFound = false;
2186 		if (*nextPath == '/') {
2187 			directoryFound = true;
2188 			*nextPath = '\0';
2189 			do
2190 				nextPath++;
2191 			while (*nextPath == '/');
2192 		}
2193 
2194 		// See if the '..' is at a covering vnode move to the covered
2195 		// vnode so we pass the '..' path to the underlying filesystem.
2196 		// Also prevent breaking the root of the IO context.
2197 		if (strcmp("..", path) == 0) {
2198 			if (vnode.Get() == ioContext->root) {
2199 				// Attempted prison break! Keep it contained.
2200 				path = nextPath;
2201 				continue;
2202 			}
2203 
2204 			if (Vnode* coveredVnode = get_covered_vnode(vnode.Get()))
2205 				vnode.SetTo(coveredVnode);
2206 		}
2207 
2208 		// check if vnode is really a directory
2209 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2210 			status = B_NOT_A_DIRECTORY;
2211 
2212 		// Check if we have the right to search the current directory vnode.
2213 		// If a file system doesn't have the access() function, we assume that
2214 		// searching a directory is always allowed
2215 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2216 			status = FS_CALL(vnode.Get(), access, X_OK);
2217 
2218 		// Tell the filesystem to get the vnode of this path component (if we
2219 		// got the permission from the call above)
2220 		VnodePutter nextVnode;
2221 		if (status == B_OK) {
2222 			struct vnode* temp = NULL;
2223 			status = lookup_dir_entry(vnode.Get(), path, &temp);
2224 			nextVnode.SetTo(temp);
2225 		}
2226 
2227 		if (status != B_OK) {
2228 			if (leafName != NULL) {
2229 				strlcpy(leafName, path, B_FILE_NAME_LENGTH);
2230 				_vnode.SetTo(vnode.Detach());
2231 			}
2232 			return status;
2233 		}
2234 
2235 		// If the new node is a symbolic link, resolve it (if we've been told
2236 		// to do it)
2237 		if (S_ISLNK(nextVnode->Type())
2238 			&& (traverseLeafLink || directoryFound)) {
2239 			size_t bufferSize;
2240 			char* buffer;
2241 
2242 			TRACE(("traverse link\n"));
2243 
2244 			if (count + 1 > B_MAX_SYMLINKS)
2245 				return B_LINK_LIMIT;
2246 
2247 			bufferSize = B_PATH_NAME_LENGTH;
2248 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2249 			if (buffer == NULL)
2250 				return B_NO_MEMORY;
2251 
2252 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2253 				bufferSize--;
2254 				status = FS_CALL(nextVnode.Get(), read_symlink, buffer, &bufferSize);
2255 				// null-terminate
2256 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2257 					buffer[bufferSize] = '\0';
2258 			} else
2259 				status = B_BAD_VALUE;
2260 
2261 			if (status != B_OK) {
2262 				free(buffer);
2263 				return status;
2264 			}
2265 			nextVnode.Unset();
2266 
2267 			// Check if we start from the root directory or the current
2268 			// directory ("vnode" still points to that one).
2269 			// Cut off all leading slashes if it's the root directory
2270 			path = buffer;
2271 			bool absoluteSymlink = false;
2272 			if (path[0] == '/') {
2273 				// we don't need the old directory anymore
2274 				vnode.Unset();
2275 
2276 				while (*++path == '/')
2277 					;
2278 
2279 				mutex_lock(&sIOContextRootLock);
2280 				vnode.SetTo(ioContext->root);
2281 				inc_vnode_ref_count(vnode.Get());
2282 				mutex_unlock(&sIOContextRootLock);
2283 
2284 				absoluteSymlink = true;
2285 			}
2286 
2287 			inc_vnode_ref_count(vnode.Get());
2288 				// balance the next recursion - we will decrement the
2289 				// ref_count of the vnode, no matter if we succeeded or not
2290 
2291 			if (absoluteSymlink && *path == '\0') {
2292 				// symlink was just "/"
2293 				nextVnode.SetTo(vnode.Get());
2294 			} else {
2295 				status = vnode_path_to_vnode(vnode.Get(), path, true, count + 1,
2296 					ioContext, nextVnode, &lastParentID, leafName);
2297 			}
2298 
2299 			object_cache_free(sPathNameCache, buffer, 0);
2300 
2301 			if (status != B_OK) {
2302 				if (leafName != NULL)
2303 					_vnode.SetTo(nextVnode.Detach());
2304 				return status;
2305 			}
2306 		} else
2307 			lastParentID = vnode->id;
2308 
2309 		// decrease the ref count on the old dir we just looked up into
2310 		vnode.Unset();
2311 
2312 		path = nextPath;
2313 		vnode.SetTo(nextVnode.Detach());
2314 
2315 		// see if we hit a covered node
2316 		if (Vnode* coveringNode = get_covering_vnode(vnode.Get()))
2317 			vnode.SetTo(coveringNode);
2318 	}
2319 
2320 	_vnode.SetTo(vnode.Detach());
2321 	if (_parentID)
2322 		*_parentID = lastParentID;
2323 
2324 	return B_OK;
2325 }
2326 
2327 
2328 static status_t
vnode_path_to_vnode(struct vnode * vnode,char * path,bool traverseLeafLink,bool kernel,VnodePutter & _vnode,ino_t * _parentID,char * leafName)2329 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2330 	bool kernel, VnodePutter& _vnode, ino_t* _parentID, char* leafName)
2331 {
2332 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2333 		get_current_io_context(kernel), _vnode, _parentID, leafName);
2334 }
2335 
2336 
2337 static status_t
path_to_vnode(char * path,bool traverseLink,VnodePutter & _vnode,ino_t * _parentID,bool kernel)2338 path_to_vnode(char* path, bool traverseLink, VnodePutter& _vnode,
2339 	ino_t* _parentID, bool kernel)
2340 {
2341 	struct vnode* start = NULL;
2342 
2343 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2344 
2345 	if (!path)
2346 		return B_BAD_VALUE;
2347 
2348 	if (*path == '\0')
2349 		return B_ENTRY_NOT_FOUND;
2350 
2351 	// figure out if we need to start at root or at cwd
2352 	if (*path == '/') {
2353 		if (sRoot == NULL) {
2354 			// we're a bit early, aren't we?
2355 			return B_ERROR;
2356 		}
2357 
2358 		while (*++path == '/')
2359 			;
2360 		start = get_root_vnode(kernel);
2361 
2362 		if (*path == '\0') {
2363 			_vnode.SetTo(start);
2364 			return B_OK;
2365 		}
2366 	} else {
2367 		const struct io_context* context = get_current_io_context(kernel);
2368 
2369 		rw_lock_read_lock(&context->lock);
2370 		start = context->cwd;
2371 		if (start != NULL)
2372 			inc_vnode_ref_count(start);
2373 		rw_lock_read_unlock(&context->lock);
2374 
2375 		if (start == NULL)
2376 			return B_ERROR;
2377 	}
2378 
2379 	return vnode_path_to_vnode(start, path, traverseLink, kernel, _vnode,
2380 		_parentID);
2381 }
2382 
2383 
2384 /*! Returns the vnode in the next to last segment of the path, and returns
2385 	the last portion in filename.
2386 	The path buffer must be able to store at least one additional character.
2387 */
2388 static status_t
path_to_dir_vnode(char * path,VnodePutter & _vnode,char * filename,bool kernel)2389 path_to_dir_vnode(char* path, VnodePutter& _vnode, char* filename,
2390 	bool kernel)
2391 {
2392 	status_t status = get_dir_path_and_leaf(path, filename);
2393 	if (status != B_OK)
2394 		return status;
2395 
2396 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2397 }
2398 
2399 
2400 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2401 		   to by a FD + path pair.
2402 
2403 	\a path must be given in either case. \a fd might be omitted, in which
2404 	case \a path is either an absolute path or one relative to the current
2405 	directory. If both a supplied and \a path is relative it is reckoned off
2406 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2407 	ignored.
2408 
2409 	The caller has the responsibility to call put_vnode() on the returned
2410 	directory vnode.
2411 
2412 	\param fd The FD. May be < 0.
2413 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2414 	       is modified by this function. It must have at least room for a
2415 	       string one character longer than the path it contains.
2416 	\param _vnode A pointer to a variable the directory vnode shall be written
2417 		   into.
2418 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2419 		   the leaf name of the specified entry will be written.
2420 	\param kernel \c true, if invoked from inside the kernel, \c false if
2421 		   invoked from userland.
2422 	\return \c B_OK, if everything went fine, another error code otherwise.
2423 */
2424 static status_t
fd_and_path_to_dir_vnode(int fd,char * path,VnodePutter & _vnode,char * filename,bool kernel)2425 fd_and_path_to_dir_vnode(int fd, char* path, VnodePutter& _vnode,
2426 	char* filename, bool kernel)
2427 {
2428 	if (!path)
2429 		return B_BAD_VALUE;
2430 	if (*path == '\0')
2431 		return B_ENTRY_NOT_FOUND;
2432 	if (fd == AT_FDCWD || fd == -1 || *path == '/')
2433 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2434 
2435 	status_t status = get_dir_path_and_leaf(path, filename);
2436 	if (status != B_OK)
2437 		return status;
2438 
2439 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2440 }
2441 
2442 
2443 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2444 		   to by a vnode + path pair.
2445 
2446 	\a path must be given in either case. \a vnode might be omitted, in which
2447 	case \a path is either an absolute path or one relative to the current
2448 	directory. If both a supplied and \a path is relative it is reckoned off
2449 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2450 	ignored.
2451 
2452 	The caller has the responsibility to call put_vnode() on the returned
2453 	directory vnode.
2454 
2455 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2456 	it is successful or not.
2457 
2458 	\param vnode The vnode. May be \c NULL.
2459 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2460 	       is modified by this function. It must have at least room for a
2461 	       string one character longer than the path it contains.
2462 	\param _vnode A pointer to a variable the directory vnode shall be written
2463 		   into.
2464 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2465 		   the leaf name of the specified entry will be written.
2466 	\param kernel \c true, if invoked from inside the kernel, \c false if
2467 		   invoked from userland.
2468 	\return \c B_OK, if everything went fine, another error code otherwise.
2469 */
2470 static status_t
vnode_and_path_to_dir_vnode(struct vnode * vnode,char * path,VnodePutter & _vnode,char * filename,bool kernel)2471 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2472 	VnodePutter& _vnode, char* filename, bool kernel)
2473 {
2474 	VnodePutter vnodePutter(vnode);
2475 
2476 	if (!path)
2477 		return B_BAD_VALUE;
2478 	if (*path == '\0')
2479 		return B_ENTRY_NOT_FOUND;
2480 	if (vnode == NULL || path[0] == '/')
2481 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2482 
2483 	status_t status = get_dir_path_and_leaf(path, filename);
2484 	if (status != B_OK)
2485 		return status;
2486 
2487 	vnodePutter.Detach();
2488 	return vnode_path_to_vnode(vnode, path, true, kernel, _vnode, NULL);
2489 }
2490 
2491 
2492 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2493 */
2494 static status_t
get_vnode_name(struct vnode * vnode,struct vnode * parent,struct dirent * buffer,size_t bufferSize,struct io_context * ioContext)2495 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2496 	size_t bufferSize, struct io_context* ioContext)
2497 {
2498 	if (bufferSize < sizeof(struct dirent))
2499 		return B_BAD_VALUE;
2500 
2501 	// See if the vnode is covering another vnode and move to the covered
2502 	// vnode so we get the underlying file system
2503 	VnodePutter vnodePutter;
2504 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2505 		vnode = coveredVnode;
2506 		vnodePutter.SetTo(vnode);
2507 	}
2508 
2509 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2510 		// The FS supports getting the name of a vnode.
2511 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2512 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2513 			return B_OK;
2514 	}
2515 
2516 	// The FS doesn't support getting the name of a vnode. So we search the
2517 	// parent directory for the vnode, if the caller let us.
2518 
2519 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2520 		return B_UNSUPPORTED;
2521 
2522 	void* cookie;
2523 
2524 	status_t status = FS_CALL(parent, open_dir, &cookie);
2525 	if (status >= B_OK) {
2526 		while (true) {
2527 			uint32 num = 1;
2528 			// We use the FS hook directly instead of dir_read(), since we don't
2529 			// want the entries to be fixed. We have already resolved vnode to
2530 			// the covered node.
2531 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2532 				&num);
2533 			if (status != B_OK)
2534 				break;
2535 			if (num == 0) {
2536 				status = B_ENTRY_NOT_FOUND;
2537 				break;
2538 			}
2539 
2540 			if (vnode->id == buffer->d_ino) {
2541 				// found correct entry!
2542 				break;
2543 			}
2544 		}
2545 
2546 		FS_CALL(parent, close_dir, cookie);
2547 		FS_CALL(parent, free_dir_cookie, cookie);
2548 	}
2549 	return status;
2550 }
2551 
2552 
2553 static status_t
get_vnode_name(struct vnode * vnode,struct vnode * parent,char * name,size_t nameSize,bool kernel)2554 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2555 	size_t nameSize, bool kernel)
2556 {
2557 	char buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2558 	struct dirent* dirent = (struct dirent*)buffer;
2559 
2560 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2561 		get_current_io_context(kernel));
2562 	if (status != B_OK)
2563 		return status;
2564 
2565 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2566 		return B_BUFFER_OVERFLOW;
2567 
2568 	return B_OK;
2569 }
2570 
2571 
2572 /*!	Gets the full path to a given directory vnode.
2573 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2574 	file system doesn't support this call, it will fall back to iterating
2575 	through the parent directory to get the name of the child.
2576 
2577 	To protect against circular loops, it supports a maximum tree depth
2578 	of 256 levels.
2579 
2580 	Note that the path may not be correct the time this function returns!
2581 	It doesn't use any locking to prevent returning the correct path, as
2582 	paths aren't safe anyway: the path to a file can change at any time.
2583 
2584 	It might be a good idea, though, to check if the returned path exists
2585 	in the calling function (it's not done here because of efficiency)
2586 */
2587 static status_t
dir_vnode_to_path(struct vnode * vnode,char * buffer,size_t bufferSize,bool kernel)2588 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2589 	bool kernel)
2590 {
2591 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2592 
2593 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2594 		return B_BAD_VALUE;
2595 
2596 	if (!S_ISDIR(vnode->Type()))
2597 		return B_NOT_A_DIRECTORY;
2598 
2599 	char* path = buffer;
2600 	int32 insert = bufferSize;
2601 	int32 maxLevel = 256;
2602 	int32 length;
2603 	status_t status = B_OK;
2604 	struct io_context* ioContext = get_current_io_context(kernel);
2605 
2606 	// we don't use get_vnode() here because this call is more
2607 	// efficient and does all we need from get_vnode()
2608 	inc_vnode_ref_count(vnode);
2609 
2610 	path[--insert] = '\0';
2611 		// the path is filled right to left
2612 
2613 	while (true) {
2614 		// If the node is the context's root, bail out. Otherwise resolve mount
2615 		// points.
2616 		if (vnode == ioContext->root)
2617 			break;
2618 
2619 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2620 			put_vnode(vnode);
2621 			vnode = coveredVnode;
2622 		}
2623 
2624 		// lookup the parent vnode
2625 		struct vnode* parentVnode;
2626 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2627 		if (status != B_OK)
2628 			goto out;
2629 
2630 		if (parentVnode == vnode) {
2631 			// The caller apparently got their hands on a node outside of their
2632 			// context's root. Now we've hit the global root.
2633 			put_vnode(parentVnode);
2634 			break;
2635 		}
2636 
2637 		// get the node's name
2638 		char nameBuffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2639 			// also used for fs_read_dir()
2640 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2641 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2642 			sizeof(nameBuffer), ioContext);
2643 
2644 		// release the current vnode, we only need its parent from now on
2645 		put_vnode(vnode);
2646 		vnode = parentVnode;
2647 
2648 		if (status != B_OK)
2649 			goto out;
2650 
2651 		// TODO: add an explicit check for loops in about 10 levels to do
2652 		// real loop detection
2653 
2654 		// don't go deeper as 'maxLevel' to prevent circular loops
2655 		if (maxLevel-- < 0) {
2656 			status = B_LINK_LIMIT;
2657 			goto out;
2658 		}
2659 
2660 		// add the name in front of the current path
2661 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2662 		length = strlen(name);
2663 		insert -= length;
2664 		if (insert <= 0) {
2665 			status = B_RESULT_NOT_REPRESENTABLE;
2666 			goto out;
2667 		}
2668 		memcpy(path + insert, name, length);
2669 		path[--insert] = '/';
2670 	}
2671 
2672 	// the root dir will result in an empty path: fix it
2673 	if (path[insert] == '\0')
2674 		path[--insert] = '/';
2675 
2676 	TRACE(("  path is: %s\n", path + insert));
2677 
2678 	// move the path to the start of the buffer
2679 	length = bufferSize - insert;
2680 	memmove(buffer, path + insert, length);
2681 
2682 out:
2683 	put_vnode(vnode);
2684 	return status;
2685 }
2686 
2687 
2688 /*!	Checks the length of every path component, and adds a '.'
2689 	if the path ends in a slash.
2690 	The given path buffer must be able to store at least one
2691 	additional character.
2692 */
2693 static status_t
check_path(char * to)2694 check_path(char* to)
2695 {
2696 	int32 length = 0;
2697 
2698 	// check length of every path component
2699 
2700 	while (*to) {
2701 		char* begin;
2702 		if (*to == '/')
2703 			to++, length++;
2704 
2705 		begin = to;
2706 		while (*to != '/' && *to)
2707 			to++, length++;
2708 
2709 		if (to - begin > B_FILE_NAME_LENGTH)
2710 			return B_NAME_TOO_LONG;
2711 	}
2712 
2713 	if (length == 0)
2714 		return B_ENTRY_NOT_FOUND;
2715 
2716 	// complete path if there is a slash at the end
2717 
2718 	if (*(to - 1) == '/') {
2719 		if (length > B_PATH_NAME_LENGTH - 2)
2720 			return B_NAME_TOO_LONG;
2721 
2722 		to[0] = '.';
2723 		to[1] = '\0';
2724 	}
2725 
2726 	return B_OK;
2727 }
2728 
2729 
2730 static struct file_descriptor*
get_fd_and_vnode(int fd,struct vnode ** _vnode,bool kernel)2731 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2732 {
2733 	struct file_descriptor* descriptor
2734 		= get_fd(get_current_io_context(kernel), fd);
2735 	if (descriptor == NULL)
2736 		return NULL;
2737 
2738 	struct vnode* vnode = fd_vnode(descriptor);
2739 	if (vnode == NULL) {
2740 		put_fd(descriptor);
2741 		return NULL;
2742 	}
2743 
2744 	// ToDo: when we can close a file descriptor at any point, investigate
2745 	//	if this is still valid to do (accessing the vnode without ref_count
2746 	//	or locking)
2747 	*_vnode = vnode;
2748 	return descriptor;
2749 }
2750 
2751 
2752 static struct vnode*
get_vnode_from_fd(int fd,bool kernel)2753 get_vnode_from_fd(int fd, bool kernel)
2754 {
2755 	struct file_descriptor* descriptor;
2756 	struct vnode* vnode;
2757 
2758 	descriptor = get_fd(get_current_io_context(kernel), fd);
2759 	if (descriptor == NULL)
2760 		return NULL;
2761 
2762 	vnode = fd_vnode(descriptor);
2763 	if (vnode != NULL)
2764 		inc_vnode_ref_count(vnode);
2765 
2766 	put_fd(descriptor);
2767 	return vnode;
2768 }
2769 
2770 
2771 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2772 	only the path will be considered. In this case, the \a path must not be
2773 	NULL.
2774 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2775 	and should be NULL for files.
2776 */
2777 static status_t
fd_and_path_to_vnode(int fd,char * path,bool traverseLeafLink,VnodePutter & _vnode,ino_t * _parentID,bool kernel)2778 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2779 	VnodePutter& _vnode, ino_t* _parentID, bool kernel)
2780 {
2781 	if (fd < 0 && !path)
2782 		return B_BAD_VALUE;
2783 
2784 	if (path != NULL && *path == '\0')
2785 		return B_ENTRY_NOT_FOUND;
2786 
2787 	if ((fd == AT_FDCWD || fd == -1) || (path != NULL && path[0] == '/')) {
2788 		// no FD or absolute path
2789 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2790 	}
2791 
2792 	// FD only, or FD + relative path
2793 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2794 	if (vnode == NULL)
2795 		return B_FILE_ERROR;
2796 
2797 	if (path != NULL) {
2798 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, kernel,
2799 			_vnode, _parentID);
2800 	}
2801 
2802 	// there is no relative path to take into account
2803 
2804 	_vnode.SetTo(vnode);
2805 	if (_parentID)
2806 		*_parentID = -1;
2807 
2808 	return B_OK;
2809 }
2810 
2811 
2812 struct vnode*
fd_vnode(struct file_descriptor * descriptor)2813 fd_vnode(struct file_descriptor* descriptor)
2814 {
2815 	if (descriptor->ops == &sFileOps
2816 			|| descriptor->ops == &sDirectoryOps
2817 			|| descriptor->ops == &sAttributeOps
2818 			|| descriptor->ops == &sAttributeDirectoryOps)
2819 		return descriptor->u.vnode;
2820 
2821 	return NULL;
2822 }
2823 
2824 
2825 bool
fd_is_file(struct file_descriptor * descriptor)2826 fd_is_file(struct file_descriptor* descriptor)
2827 {
2828 	return descriptor->ops == &sFileOps;
2829 }
2830 
2831 
2832 static int
get_new_fd(struct fd_ops * ops,struct fs_mount * mount,struct vnode * vnode,void * cookie,int openMode,bool kernel)2833 get_new_fd(struct fd_ops* ops, struct fs_mount* mount, struct vnode* vnode,
2834 	void* cookie, int openMode, bool kernel)
2835 {
2836 	struct file_descriptor* descriptor;
2837 	int fd;
2838 
2839 	// If the vnode is locked, we don't allow creating a new file/directory
2840 	// file_descriptor for it
2841 	if (vnode && vnode->mandatory_locked_by != NULL
2842 		&& (ops == &sFileOps || ops == &sDirectoryOps))
2843 		return B_BUSY;
2844 
2845 	if ((openMode & O_RDWR) != 0 && (openMode & O_WRONLY) != 0)
2846 		return B_BAD_VALUE;
2847 
2848 	descriptor = alloc_fd();
2849 	if (!descriptor)
2850 		return B_NO_MEMORY;
2851 
2852 	if (vnode)
2853 		descriptor->u.vnode = vnode;
2854 	else
2855 		descriptor->u.mount = mount;
2856 	descriptor->cookie = cookie;
2857 
2858 	descriptor->ops = ops;
2859 	descriptor->open_mode = openMode;
2860 
2861 	if (descriptor->ops->fd_seek != NULL) {
2862 		// some kinds of files are not seekable
2863 		switch (vnode->Type() & S_IFMT) {
2864 			case S_IFIFO:
2865 			case S_IFSOCK:
2866 				ASSERT(descriptor->pos == -1);
2867 				break;
2868 
2869 			// The Open Group Base Specs don't mention any file types besides pipes,
2870 			// FIFOs, and sockets specially, so we allow seeking all others.
2871 			default:
2872 				descriptor->pos = 0;
2873 				break;
2874 		}
2875 	}
2876 
2877 	io_context* context = get_current_io_context(kernel);
2878 	fd = new_fd(context, descriptor);
2879 	if (fd < 0) {
2880 		descriptor->ops = NULL;
2881 		put_fd(descriptor);
2882 		return B_NO_MORE_FDS;
2883 	}
2884 
2885 	rw_lock_write_lock(&context->lock);
2886 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2887 	rw_lock_write_unlock(&context->lock);
2888 
2889 	return fd;
2890 }
2891 
2892 
2893 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2894 	vfs_normalize_path(). See there for more documentation.
2895 */
2896 static status_t
normalize_path(char * path,size_t pathSize,bool traverseLink,bool kernel)2897 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2898 {
2899 	VnodePutter dir;
2900 	status_t error;
2901 
2902 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2903 		// get dir vnode + leaf name
2904 		char leaf[B_FILE_NAME_LENGTH];
2905 		error = vnode_and_path_to_dir_vnode(dir.Detach(), path, dir, leaf, kernel);
2906 		if (error != B_OK)
2907 			return error;
2908 		strcpy(path, leaf);
2909 
2910 		// get file vnode, if we shall resolve links
2911 		bool fileExists = false;
2912 		VnodePutter fileVnode;
2913 		if (traverseLink) {
2914 			inc_vnode_ref_count(dir.Get());
2915 			if (vnode_path_to_vnode(dir.Get(), path, false, kernel, fileVnode,
2916 					NULL) == B_OK) {
2917 				fileExists = true;
2918 			}
2919 		}
2920 
2921 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2922 			// we're done -- construct the path
2923 			bool hasLeaf = true;
2924 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2925 				// special cases "." and ".." -- get the dir, forget the leaf
2926 				error = vnode_path_to_vnode(dir.Detach(), leaf, false, kernel,
2927 					dir, NULL);
2928 				if (error != B_OK)
2929 					return error;
2930 				hasLeaf = false;
2931 			}
2932 
2933 			// get the directory path
2934 			error = dir_vnode_to_path(dir.Get(), path, B_PATH_NAME_LENGTH, kernel);
2935 			if (error != B_OK)
2936 				return error;
2937 
2938 			// append the leaf name
2939 			if (hasLeaf) {
2940 				// insert a directory separator if this is not the file system
2941 				// root
2942 				if ((strcmp(path, "/") != 0
2943 					&& strlcat(path, "/", pathSize) >= pathSize)
2944 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2945 					return B_NAME_TOO_LONG;
2946 				}
2947 			}
2948 
2949 			return B_OK;
2950 		}
2951 
2952 		// read link
2953 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2954 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2955 			error = FS_CALL(fileVnode.Get(), read_symlink, path, &bufferSize);
2956 			if (error != B_OK)
2957 				return error;
2958 			if (bufferSize < B_PATH_NAME_LENGTH)
2959 				path[bufferSize] = '\0';
2960 		} else
2961 			return B_BAD_VALUE;
2962 	}
2963 
2964 	return B_LINK_LIMIT;
2965 }
2966 
2967 
2968 static status_t
resolve_covered_parent(struct vnode * parent,dev_t * _device,ino_t * _node,struct io_context * ioContext)2969 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2970 	struct io_context* ioContext)
2971 {
2972 	// Make sure the IO context root is not bypassed.
2973 	if (parent == ioContext->root) {
2974 		*_device = parent->device;
2975 		*_node = parent->id;
2976 		return B_OK;
2977 	}
2978 
2979 	inc_vnode_ref_count(parent);
2980 		// vnode_path_to_vnode() puts the node
2981 
2982 	// ".." is guaranteed not to be clobbered by this call
2983 	VnodePutter vnode;
2984 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false,
2985 		ioContext, vnode, NULL);
2986 	if (status == B_OK) {
2987 		*_device = vnode->device;
2988 		*_node = vnode->id;
2989 	}
2990 
2991 	return status;
2992 }
2993 
2994 
2995 #ifdef ADD_DEBUGGER_COMMANDS
2996 
2997 
2998 static void
_dump_advisory_locking(advisory_locking * locking)2999 _dump_advisory_locking(advisory_locking* locking)
3000 {
3001 	if (locking == NULL)
3002 		return;
3003 
3004 	kprintf("   lock:        %" B_PRId32, locking->lock);
3005 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
3006 
3007 	int32 index = 0;
3008 	LockList::Iterator iterator = locking->locks.GetIterator();
3009 	while (iterator.HasNext()) {
3010 		struct advisory_lock* lock = iterator.Next();
3011 
3012 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
3013 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3014 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3015 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3016 	}
3017 }
3018 
3019 
3020 static void
_dump_mount(struct fs_mount * mount)3021 _dump_mount(struct fs_mount* mount)
3022 {
3023 	kprintf("MOUNT: %p\n", mount);
3024 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3025 	kprintf(" device_name:   %s\n", mount->device_name);
3026 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3027 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3028 	kprintf(" partition:     %p\n", mount->partition);
3029 	kprintf(" lock:          %p\n", &mount->lock);
3030 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3031 		mount->owns_file_device ? " owns_file_device" : "");
3032 
3033 	fs_volume* volume = mount->volume;
3034 	while (volume != NULL) {
3035 		kprintf(" volume %p:\n", volume);
3036 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3037 		kprintf("  private_volume:   %p\n", volume->private_volume);
3038 		kprintf("  ops:              %p\n", volume->ops);
3039 		kprintf("  file_system:      %p\n", volume->file_system);
3040 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3041 		volume = volume->super_volume;
3042 	}
3043 
3044 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3045 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3046 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3047 	set_debug_variable("_partition", (addr_t)mount->partition);
3048 }
3049 
3050 
3051 static bool
debug_prepend_vnode_name_to_path(char * buffer,size_t & bufferSize,const char * name)3052 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3053 	const char* name)
3054 {
3055 	bool insertSlash = buffer[bufferSize] != '\0';
3056 	size_t nameLength = strlen(name);
3057 
3058 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3059 		return false;
3060 
3061 	if (insertSlash)
3062 		buffer[--bufferSize] = '/';
3063 
3064 	bufferSize -= nameLength;
3065 	memcpy(buffer + bufferSize, name, nameLength);
3066 
3067 	return true;
3068 }
3069 
3070 
3071 static bool
debug_prepend_vnode_id_to_path(char * buffer,size_t & bufferSize,dev_t devID,ino_t nodeID)3072 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3073 	ino_t nodeID)
3074 {
3075 	if (bufferSize == 0)
3076 		return false;
3077 
3078 	bool insertSlash = buffer[bufferSize] != '\0';
3079 	if (insertSlash)
3080 		buffer[--bufferSize] = '/';
3081 
3082 	size_t size = snprintf(buffer, bufferSize,
3083 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3084 	if (size > bufferSize) {
3085 		if (insertSlash)
3086 			bufferSize++;
3087 		return false;
3088 	}
3089 
3090 	if (size < bufferSize)
3091 		memmove(buffer + bufferSize - size, buffer, size);
3092 
3093 	bufferSize -= size;
3094 	return true;
3095 }
3096 
3097 
3098 static char*
debug_resolve_vnode_path(struct vnode * vnode,char * buffer,size_t bufferSize,bool & _truncated)3099 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3100 	bool& _truncated)
3101 {
3102 	// null-terminate the path
3103 	buffer[--bufferSize] = '\0';
3104 
3105 	while (true) {
3106 		while (vnode->covers != NULL)
3107 			vnode = vnode->covers;
3108 
3109 		if (vnode == sRoot) {
3110 			_truncated = bufferSize == 0;
3111 			if (!_truncated)
3112 				buffer[--bufferSize] = '/';
3113 			return buffer + bufferSize;
3114 		}
3115 
3116 		// resolve the name
3117 		ino_t dirID;
3118 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3119 			vnode->id, dirID);
3120 		if (name == NULL) {
3121 			// Failed to resolve the name -- prepend "<dev,node>/".
3122 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3123 				vnode->mount->id, vnode->id);
3124 			return buffer + bufferSize;
3125 		}
3126 
3127 		// prepend the name
3128 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3129 			_truncated = true;
3130 			return buffer + bufferSize;
3131 		}
3132 
3133 		// resolve the directory node
3134 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3135 		if (nextVnode == NULL) {
3136 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3137 				vnode->mount->id, dirID);
3138 			return buffer + bufferSize;
3139 		}
3140 
3141 		vnode = nextVnode;
3142 	}
3143 }
3144 
3145 
3146 static void
_dump_vnode(struct vnode * vnode,bool printPath)3147 _dump_vnode(struct vnode* vnode, bool printPath)
3148 {
3149 	kprintf("VNODE: %p\n", vnode);
3150 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3151 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3152 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3153 	kprintf(" private_node:  %p\n", vnode->private_node);
3154 	kprintf(" mount:         %p\n", vnode->mount);
3155 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3156 	kprintf(" covers:        %p\n", vnode->covers);
3157 	kprintf(" cache:         %p\n", vnode->cache);
3158 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3159 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3160 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3161 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3162 
3163 	_dump_advisory_locking(vnode->advisory_locking);
3164 
3165 	if (printPath) {
3166 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3167 		if (buffer != NULL) {
3168 			bool truncated;
3169 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3170 				B_PATH_NAME_LENGTH, truncated);
3171 			if (path != NULL) {
3172 				kprintf(" path:          ");
3173 				if (truncated)
3174 					kputs("<truncated>/");
3175 				kputs(path);
3176 				kputs("\n");
3177 			} else
3178 				kprintf("Failed to resolve vnode path.\n");
3179 
3180 			debug_free(buffer);
3181 		} else
3182 			kprintf("Failed to allocate memory for constructing the path.\n");
3183 	}
3184 
3185 	set_debug_variable("_node", (addr_t)vnode->private_node);
3186 	set_debug_variable("_mount", (addr_t)vnode->mount);
3187 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3188 	set_debug_variable("_covers", (addr_t)vnode->covers);
3189 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3190 }
3191 
3192 
3193 static int
dump_mount(int argc,char ** argv)3194 dump_mount(int argc, char** argv)
3195 {
3196 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3197 		kprintf("usage: %s [id|address]\n", argv[0]);
3198 		return 0;
3199 	}
3200 
3201 	ulong val = parse_expression(argv[1]);
3202 	uint32 id = val;
3203 
3204 	struct fs_mount* mount = sMountsTable->Lookup(id);
3205 	if (mount == NULL) {
3206 		if (IS_USER_ADDRESS(id)) {
3207 			kprintf("fs_mount not found\n");
3208 			return 0;
3209 		}
3210 		mount = (fs_mount*)val;
3211 	}
3212 
3213 	_dump_mount(mount);
3214 	return 0;
3215 }
3216 
3217 
3218 static int
dump_mounts(int argc,char ** argv)3219 dump_mounts(int argc, char** argv)
3220 {
3221 	if (argc != 1) {
3222 		kprintf("usage: %s\n", argv[0]);
3223 		return 0;
3224 	}
3225 
3226 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3227 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3228 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3229 
3230 	struct fs_mount* mount;
3231 
3232 	MountTable::Iterator iterator(sMountsTable);
3233 	while (iterator.HasNext()) {
3234 		mount = iterator.Next();
3235 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3236 			mount->root_vnode->covers, mount->volume->private_volume,
3237 			mount->volume->file_system_name);
3238 
3239 		fs_volume* volume = mount->volume;
3240 		while (volume->super_volume != NULL) {
3241 			volume = volume->super_volume;
3242 			kprintf("                                     %p %s\n",
3243 				volume->private_volume, volume->file_system_name);
3244 		}
3245 	}
3246 
3247 	return 0;
3248 }
3249 
3250 
3251 static int
dump_vnode(int argc,char ** argv)3252 dump_vnode(int argc, char** argv)
3253 {
3254 	bool printPath = false;
3255 	int argi = 1;
3256 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3257 		printPath = true;
3258 		argi++;
3259 	}
3260 
3261 	if (argi >= argc || argi + 2 < argc || strcmp(argv[argi], "--help") == 0) {
3262 		print_debugger_command_usage(argv[0]);
3263 		return 0;
3264 	}
3265 
3266 	struct vnode* vnode = NULL;
3267 
3268 	if (argi + 1 == argc) {
3269 		vnode = (struct vnode*)parse_expression(argv[argi]);
3270 		if (IS_USER_ADDRESS(vnode)) {
3271 			kprintf("invalid vnode address\n");
3272 			return 0;
3273 		}
3274 		_dump_vnode(vnode, printPath);
3275 		return 0;
3276 	}
3277 
3278 	dev_t device = parse_expression(argv[argi]);
3279 	ino_t id = parse_expression(argv[argi + 1]);
3280 
3281 	VnodeTable::Iterator iterator(sVnodeTable);
3282 	while (iterator.HasNext()) {
3283 		vnode = iterator.Next();
3284 		if (vnode->id != id || vnode->device != device)
3285 			continue;
3286 
3287 		_dump_vnode(vnode, printPath);
3288 	}
3289 
3290 	return 0;
3291 }
3292 
3293 
3294 static int
dump_vnodes(int argc,char ** argv)3295 dump_vnodes(int argc, char** argv)
3296 {
3297 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3298 		kprintf("usage: %s [device]\n", argv[0]);
3299 		return 0;
3300 	}
3301 
3302 	// restrict dumped nodes to a certain device if requested
3303 	dev_t device = parse_expression(argv[1]);
3304 
3305 	struct vnode* vnode;
3306 
3307 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3308 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3309 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3310 
3311 	VnodeTable::Iterator iterator(sVnodeTable);
3312 	while (iterator.HasNext()) {
3313 		vnode = iterator.Next();
3314 		if (vnode->device != device)
3315 			continue;
3316 
3317 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3318 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3319 			vnode->private_node, vnode->advisory_locking,
3320 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3321 			vnode->IsUnpublished() ? "u" : "-");
3322 	}
3323 
3324 	return 0;
3325 }
3326 
3327 
3328 static int
dump_vnode_caches(int argc,char ** argv)3329 dump_vnode_caches(int argc, char** argv)
3330 {
3331 	struct vnode* vnode;
3332 
3333 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3334 		kprintf("usage: %s [device]\n", argv[0]);
3335 		return 0;
3336 	}
3337 
3338 	// restrict dumped nodes to a certain device if requested
3339 	dev_t device = -1;
3340 	if (argc > 1)
3341 		device = parse_expression(argv[1]);
3342 
3343 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3344 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3345 
3346 	VnodeTable::Iterator iterator(sVnodeTable);
3347 	while (iterator.HasNext()) {
3348 		vnode = iterator.Next();
3349 		if (vnode->cache == NULL)
3350 			continue;
3351 		if (device != -1 && vnode->device != device)
3352 			continue;
3353 
3354 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3355 			vnode, vnode->device, vnode->id, vnode->cache,
3356 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3357 			vnode->cache->page_count);
3358 	}
3359 
3360 	return 0;
3361 }
3362 
3363 
3364 int
dump_io_context(int argc,char ** argv)3365 dump_io_context(int argc, char** argv)
3366 {
3367 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3368 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3369 		return 0;
3370 	}
3371 
3372 	struct io_context* context = NULL;
3373 
3374 	if (argc > 1) {
3375 		ulong num = parse_expression(argv[1]);
3376 		if (IS_KERNEL_ADDRESS(num))
3377 			context = (struct io_context*)num;
3378 		else {
3379 			Team* team = team_get_team_struct_locked(num);
3380 			if (team == NULL) {
3381 				kprintf("could not find team with ID %lu\n", num);
3382 				return 0;
3383 			}
3384 			context = (struct io_context*)team->io_context;
3385 		}
3386 	} else
3387 		context = get_current_io_context(true);
3388 
3389 	kprintf("I/O CONTEXT: %p\n", context);
3390 	kprintf(" root vnode:\t%p\n", context->root);
3391 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3392 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3393 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3394 
3395 	if (context->num_used_fds) {
3396 		kprintf("   no.    %*s  ref  open  mode         pos    %*s\n",
3397 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3398 	}
3399 
3400 	for (uint32 i = 0; i < context->table_size; i++) {
3401 		struct file_descriptor* fd = context->fds[i];
3402 		if (fd == NULL)
3403 			continue;
3404 
3405 		kprintf("  %3" B_PRIu32 ":  %p  %3" B_PRId32 "  %4"
3406 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3407 			fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3408 			fd->pos, fd->cookie,
3409 			(fd_vnode(fd) != NULL) ? "vnode" : "mount",
3410 			fd->u.vnode);
3411 	}
3412 
3413 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3414 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3415 
3416 	set_debug_variable("_cwd", (addr_t)context->cwd);
3417 
3418 	return 0;
3419 }
3420 
3421 
3422 int
dump_vnode_usage(int argc,char ** argv)3423 dump_vnode_usage(int argc, char** argv)
3424 {
3425 	if (argc != 1) {
3426 		kprintf("usage: %s\n", argv[0]);
3427 		return 0;
3428 	}
3429 
3430 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3431 		sUnusedVnodes, kMaxUnusedVnodes);
3432 
3433 	uint32 count = sVnodeTable->CountElements();
3434 
3435 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3436 		count - sUnusedVnodes);
3437 	return 0;
3438 }
3439 
3440 #endif	// ADD_DEBUGGER_COMMANDS
3441 
3442 
3443 /*!	Clears memory specified by an iovec array.
3444 */
3445 static void
zero_iovecs(const iovec * vecs,size_t vecCount,size_t bytes)3446 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3447 {
3448 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3449 		size_t length = std::min(vecs[i].iov_len, bytes);
3450 		memset(vecs[i].iov_base, 0, length);
3451 		bytes -= length;
3452 	}
3453 }
3454 
3455 
3456 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3457 	and calls the file system hooks to read/write the request to disk.
3458 */
3459 static status_t
common_file_io_vec_pages(struct vnode * vnode,void * cookie,const file_io_vec * fileVecs,size_t fileVecCount,const iovec * vecs,size_t vecCount,uint32 * _vecIndex,size_t * _vecOffset,size_t * _numBytes,bool doWrite)3460 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3461 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3462 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3463 	bool doWrite)
3464 {
3465 	if (fileVecCount == 0) {
3466 		// There are no file vecs at this offset, so we're obviously trying
3467 		// to access the file outside of its bounds
3468 		return B_BAD_VALUE;
3469 	}
3470 
3471 	size_t numBytes = *_numBytes;
3472 	uint32 fileVecIndex;
3473 	size_t vecOffset = *_vecOffset;
3474 	uint32 vecIndex = *_vecIndex;
3475 	status_t status;
3476 	size_t size;
3477 
3478 	if (!doWrite && vecOffset == 0) {
3479 		// now directly read the data from the device
3480 		// the first file_io_vec can be read directly
3481 		// TODO: we could also write directly
3482 
3483 		if (fileVecs[0].length < (off_t)numBytes)
3484 			size = fileVecs[0].length;
3485 		else
3486 			size = numBytes;
3487 
3488 		if (fileVecs[0].offset >= 0) {
3489 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3490 				&vecs[vecIndex], vecCount - vecIndex, &size);
3491 		} else {
3492 			// sparse read
3493 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3494 			status = B_OK;
3495 		}
3496 		if (status != B_OK)
3497 			return status;
3498 
3499 		ASSERT((off_t)size <= fileVecs[0].length);
3500 
3501 		// If the file portion was contiguous, we're already done now
3502 		if (size == numBytes)
3503 			return B_OK;
3504 
3505 		// if we reached the end of the file, we can return as well
3506 		if ((off_t)size != fileVecs[0].length) {
3507 			*_numBytes = size;
3508 			return B_OK;
3509 		}
3510 
3511 		fileVecIndex = 1;
3512 
3513 		// first, find out where we have to continue in our iovecs
3514 		for (; vecIndex < vecCount; vecIndex++) {
3515 			if (size < vecs[vecIndex].iov_len)
3516 				break;
3517 
3518 			size -= vecs[vecIndex].iov_len;
3519 		}
3520 
3521 		vecOffset = size;
3522 	} else {
3523 		fileVecIndex = 0;
3524 		size = 0;
3525 	}
3526 
3527 	// Too bad, let's process the rest of the file_io_vecs
3528 
3529 	size_t totalSize = size;
3530 	size_t bytesLeft = numBytes - size;
3531 
3532 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3533 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3534 		off_t fileOffset = fileVec.offset;
3535 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3536 
3537 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3538 			fileLeft));
3539 
3540 		// process the complete fileVec
3541 		while (fileLeft > 0) {
3542 			iovec tempVecs[MAX_TEMP_IO_VECS];
3543 			uint32 tempCount = 0;
3544 
3545 			// size tracks how much of what is left of the current fileVec
3546 			// (fileLeft) has been assigned to tempVecs
3547 			size = 0;
3548 
3549 			// assign what is left of the current fileVec to the tempVecs
3550 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3551 					&& tempCount < MAX_TEMP_IO_VECS;) {
3552 				// try to satisfy one iovec per iteration (or as much as
3553 				// possible)
3554 
3555 				// bytes left of the current iovec
3556 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3557 				if (vecLeft == 0) {
3558 					vecOffset = 0;
3559 					vecIndex++;
3560 					continue;
3561 				}
3562 
3563 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3564 					vecIndex, vecOffset, size));
3565 
3566 				// actually available bytes
3567 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3568 
3569 				tempVecs[tempCount].iov_base
3570 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3571 				tempVecs[tempCount].iov_len = tempVecSize;
3572 				tempCount++;
3573 
3574 				size += tempVecSize;
3575 				vecOffset += tempVecSize;
3576 			}
3577 
3578 			size_t bytes = size;
3579 
3580 			if (fileOffset == -1) {
3581 				if (doWrite) {
3582 					panic("sparse write attempt: vnode %p", vnode);
3583 					status = B_IO_ERROR;
3584 				} else {
3585 					// sparse read
3586 					zero_iovecs(tempVecs, tempCount, bytes);
3587 					status = B_OK;
3588 				}
3589 			} else if (doWrite) {
3590 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3591 					tempVecs, tempCount, &bytes);
3592 			} else {
3593 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3594 					tempVecs, tempCount, &bytes);
3595 			}
3596 			if (status != B_OK)
3597 				return status;
3598 
3599 			totalSize += bytes;
3600 			bytesLeft -= size;
3601 			if (fileOffset >= 0)
3602 				fileOffset += size;
3603 			fileLeft -= size;
3604 			//dprintf("-> file left = %Lu\n", fileLeft);
3605 
3606 			if (size != bytes || vecIndex >= vecCount) {
3607 				// there are no more bytes or iovecs, let's bail out
3608 				*_numBytes = totalSize;
3609 				return B_OK;
3610 			}
3611 		}
3612 	}
3613 
3614 	*_vecIndex = vecIndex;
3615 	*_vecOffset = vecOffset;
3616 	*_numBytes = totalSize;
3617 	return B_OK;
3618 }
3619 
3620 
3621 static bool
is_user_in_group(gid_t gid)3622 is_user_in_group(gid_t gid)
3623 {
3624 	if (gid == getegid())
3625 		return true;
3626 
3627 	gid_t groups[NGROUPS_MAX];
3628 	int groupCount = getgroups(NGROUPS_MAX, groups);
3629 	for (int i = 0; i < groupCount; i++) {
3630 		if (gid == groups[i])
3631 			return true;
3632 	}
3633 
3634 	return false;
3635 }
3636 
3637 
3638 static status_t
free_io_context(io_context * context)3639 free_io_context(io_context* context)
3640 {
3641 	TIOC(FreeIOContext(context));
3642 
3643 	if (context->root)
3644 		put_vnode(context->root);
3645 
3646 	if (context->cwd)
3647 		put_vnode(context->cwd);
3648 
3649 	rw_lock_write_lock(&context->lock);
3650 
3651 	for (uint32 i = 0; i < context->table_size; i++) {
3652 		if (struct file_descriptor* descriptor = context->fds[i]) {
3653 			close_fd(context, descriptor);
3654 			put_fd(descriptor);
3655 		}
3656 	}
3657 
3658 	rw_lock_destroy(&context->lock);
3659 
3660 	remove_node_monitors(context);
3661 	free(context->fds);
3662 	free(context);
3663 
3664 	return B_OK;
3665 }
3666 
3667 
3668 static status_t
resize_monitor_table(struct io_context * context,const int newSize)3669 resize_monitor_table(struct io_context* context, const int newSize)
3670 {
3671 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3672 		return B_BAD_VALUE;
3673 
3674 	WriteLocker locker(context->lock);
3675 
3676 	if ((size_t)newSize < context->num_monitors)
3677 		return B_BUSY;
3678 
3679 	context->max_monitors = newSize;
3680 	return B_OK;
3681 }
3682 
3683 
3684 //	#pragma mark - public API for file systems
3685 
3686 
3687 extern "C" status_t
new_vnode(fs_volume * volume,ino_t vnodeID,void * privateNode,fs_vnode_ops * ops)3688 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3689 	fs_vnode_ops* ops)
3690 {
3691 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3692 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3693 
3694 	if (privateNode == NULL)
3695 		return B_BAD_VALUE;
3696 
3697 	int32 tries = BUSY_VNODE_RETRIES;
3698 restart:
3699 	// create the node
3700 	bool nodeCreated;
3701 	struct vnode* vnode;
3702 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3703 		nodeCreated);
3704 	if (status != B_OK)
3705 		return status;
3706 
3707 	WriteLocker nodeLocker(sVnodeLock, true);
3708 		// create_new_vnode_and_lock() has locked for us
3709 
3710 	if (!nodeCreated && vnode->IsBusy()) {
3711 		nodeLocker.Unlock();
3712 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3713 			return B_BUSY;
3714 		goto restart;
3715 	}
3716 
3717 	// file system integrity check:
3718 	// test if the vnode already exists and bail out if this is the case!
3719 	if (!nodeCreated) {
3720 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3721 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3722 			vnode->private_node);
3723 		return B_ERROR;
3724 	}
3725 
3726 	vnode->private_node = privateNode;
3727 	vnode->ops = ops;
3728 	vnode->SetUnpublished(true);
3729 
3730 	TRACE(("returns: %s\n", strerror(status)));
3731 
3732 	return status;
3733 }
3734 
3735 
3736 extern "C" status_t
publish_vnode(fs_volume * volume,ino_t vnodeID,void * privateNode,fs_vnode_ops * ops,int type,uint32 flags)3737 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3738 	fs_vnode_ops* ops, int type, uint32 flags)
3739 {
3740 	FUNCTION(("publish_vnode()\n"));
3741 
3742 	int32 tries = BUSY_VNODE_RETRIES;
3743 restart:
3744 	WriteLocker locker(sVnodeLock);
3745 
3746 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3747 
3748 	bool nodeCreated = false;
3749 	if (vnode == NULL) {
3750 		if (privateNode == NULL)
3751 			return B_BAD_VALUE;
3752 
3753 		// create the node
3754 		locker.Unlock();
3755 			// create_new_vnode_and_lock() will re-lock for us on success
3756 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3757 			nodeCreated);
3758 		if (status != B_OK)
3759 			return status;
3760 
3761 		locker.SetTo(sVnodeLock, true);
3762 	}
3763 
3764 	if (nodeCreated) {
3765 		vnode->private_node = privateNode;
3766 		vnode->ops = ops;
3767 		vnode->SetUnpublished(true);
3768 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3769 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3770 		// already known, but not published
3771 	} else if (vnode->IsBusy()) {
3772 		locker.Unlock();
3773 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3774 			return B_BUSY;
3775 		goto restart;
3776 	} else
3777 		return B_BAD_VALUE;
3778 
3779 	bool publishSpecialSubNode = false;
3780 
3781 	vnode->SetType(type);
3782 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3783 	publishSpecialSubNode = is_special_node_type(type)
3784 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3785 
3786 	status_t status = B_OK;
3787 
3788 	// create sub vnodes, if necessary
3789 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3790 		locker.Unlock();
3791 
3792 		fs_volume* subVolume = volume;
3793 		if (volume->sub_volume != NULL) {
3794 			while (status == B_OK && subVolume->sub_volume != NULL) {
3795 				subVolume = subVolume->sub_volume;
3796 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3797 					vnode);
3798 			}
3799 		}
3800 
3801 		if (status == B_OK && publishSpecialSubNode)
3802 			status = create_special_sub_node(vnode, flags);
3803 
3804 		if (status != B_OK) {
3805 			// error -- clean up the created sub vnodes
3806 			while (subVolume->super_volume != volume) {
3807 				subVolume = subVolume->super_volume;
3808 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3809 			}
3810 		}
3811 
3812 		if (status == B_OK) {
3813 			ReadLocker vnodesReadLocker(sVnodeLock);
3814 			AutoLocker<Vnode> nodeLocker(vnode);
3815 			vnode->SetBusy(false);
3816 			vnode->SetUnpublished(false);
3817 		} else {
3818 			locker.Lock();
3819 			sVnodeTable->Remove(vnode);
3820 			remove_vnode_from_mount_list(vnode, vnode->mount);
3821 			object_cache_free(sVnodeCache, vnode, 0);
3822 		}
3823 	} else {
3824 		// we still hold the write lock -- mark the node unbusy and published
3825 		vnode->SetBusy(false);
3826 		vnode->SetUnpublished(false);
3827 	}
3828 
3829 	TRACE(("returns: %s\n", strerror(status)));
3830 
3831 	return status;
3832 }
3833 
3834 
3835 extern "C" status_t
get_vnode(fs_volume * volume,ino_t vnodeID,void ** _privateNode)3836 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3837 {
3838 	struct vnode* vnode;
3839 
3840 	if (volume == NULL)
3841 		return B_BAD_VALUE;
3842 
3843 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3844 	if (status != B_OK)
3845 		return status;
3846 
3847 	// If this is a layered FS, we need to get the node cookie for the requested
3848 	// layer.
3849 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3850 		fs_vnode resolvedNode;
3851 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3852 			&resolvedNode);
3853 		if (status != B_OK) {
3854 			panic("get_vnode(): Failed to get super node for vnode %p, "
3855 				"volume: %p", vnode, volume);
3856 			put_vnode(vnode);
3857 			return status;
3858 		}
3859 
3860 		if (_privateNode != NULL)
3861 			*_privateNode = resolvedNode.private_node;
3862 	} else if (_privateNode != NULL)
3863 		*_privateNode = vnode->private_node;
3864 
3865 	return B_OK;
3866 }
3867 
3868 
3869 extern "C" status_t
acquire_vnode(fs_volume * volume,ino_t vnodeID)3870 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3871 {
3872 	ReadLocker nodeLocker(sVnodeLock);
3873 
3874 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3875 	if (vnode == NULL)
3876 		return B_BAD_VALUE;
3877 
3878 	inc_vnode_ref_count(vnode);
3879 	return B_OK;
3880 }
3881 
3882 
3883 extern "C" status_t
put_vnode(fs_volume * volume,ino_t vnodeID)3884 put_vnode(fs_volume* volume, ino_t vnodeID)
3885 {
3886 	struct vnode* vnode;
3887 
3888 	rw_lock_read_lock(&sVnodeLock);
3889 	vnode = lookup_vnode(volume->id, vnodeID);
3890 	rw_lock_read_unlock(&sVnodeLock);
3891 
3892 	if (vnode == NULL)
3893 		return B_BAD_VALUE;
3894 
3895 	dec_vnode_ref_count(vnode, false, true);
3896 	return B_OK;
3897 }
3898 
3899 
3900 extern "C" status_t
remove_vnode(fs_volume * volume,ino_t vnodeID)3901 remove_vnode(fs_volume* volume, ino_t vnodeID)
3902 {
3903 	ReadLocker locker(sVnodeLock);
3904 
3905 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3906 	if (vnode == NULL)
3907 		return B_ENTRY_NOT_FOUND;
3908 
3909 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3910 		// this vnode is in use
3911 		return B_BUSY;
3912 	}
3913 
3914 	vnode->Lock();
3915 
3916 	vnode->SetRemoved(true);
3917 	bool removeUnpublished = false;
3918 
3919 	if (vnode->IsUnpublished()) {
3920 		// prepare the vnode for deletion
3921 		removeUnpublished = true;
3922 		vnode->SetBusy(true);
3923 	}
3924 
3925 	vnode->Unlock();
3926 	locker.Unlock();
3927 
3928 	if (removeUnpublished) {
3929 		// If the vnode hasn't been published yet, we delete it here
3930 		atomic_add(&vnode->ref_count, -1);
3931 		free_vnode(vnode, true);
3932 	}
3933 
3934 	return B_OK;
3935 }
3936 
3937 
3938 extern "C" status_t
unremove_vnode(fs_volume * volume,ino_t vnodeID)3939 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3940 {
3941 	struct vnode* vnode;
3942 
3943 	rw_lock_read_lock(&sVnodeLock);
3944 
3945 	vnode = lookup_vnode(volume->id, vnodeID);
3946 	if (vnode) {
3947 		AutoLocker<Vnode> nodeLocker(vnode);
3948 		vnode->SetRemoved(false);
3949 	}
3950 
3951 	rw_lock_read_unlock(&sVnodeLock);
3952 	return B_OK;
3953 }
3954 
3955 
3956 extern "C" status_t
get_vnode_removed(fs_volume * volume,ino_t vnodeID,bool * _removed)3957 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3958 {
3959 	ReadLocker _(sVnodeLock);
3960 
3961 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3962 		if (_removed != NULL)
3963 			*_removed = vnode->IsRemoved();
3964 		return B_OK;
3965 	}
3966 
3967 	return B_BAD_VALUE;
3968 }
3969 
3970 
3971 extern "C" fs_volume*
volume_for_vnode(fs_vnode * _vnode)3972 volume_for_vnode(fs_vnode* _vnode)
3973 {
3974 	if (_vnode == NULL)
3975 		return NULL;
3976 
3977 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3978 	return vnode->mount->volume;
3979 }
3980 
3981 
3982 extern "C" status_t
check_access_permissions(int accessMode,mode_t mode,gid_t nodeGroupID,uid_t nodeUserID)3983 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3984 	uid_t nodeUserID)
3985 {
3986 	// get node permissions
3987 	int userPermissions = (mode & S_IRWXU) >> 6;
3988 	int groupPermissions = (mode & S_IRWXG) >> 3;
3989 	int otherPermissions = mode & S_IRWXO;
3990 
3991 	// get the node permissions for this uid/gid
3992 	int permissions = 0;
3993 	uid_t uid = geteuid();
3994 
3995 	if (uid == 0) {
3996 		// user is root
3997 		// root has always read/write permission, but at least one of the
3998 		// X bits must be set for execute permission
3999 		permissions = userPermissions | groupPermissions | otherPermissions
4000 			| S_IROTH | S_IWOTH;
4001 		if (S_ISDIR(mode))
4002 			permissions |= S_IXOTH;
4003 	} else if (uid == nodeUserID) {
4004 		// user is node owner
4005 		permissions = userPermissions;
4006 	} else if (is_user_in_group(nodeGroupID)) {
4007 		// user is in owning group
4008 		permissions = groupPermissions;
4009 	} else {
4010 		// user is one of the others
4011 		permissions = otherPermissions;
4012 	}
4013 
4014 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4015 }
4016 
4017 
4018 #if 0
4019 extern "C" status_t
4020 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4021 	size_t* _numBytes)
4022 {
4023 	struct file_descriptor* descriptor;
4024 	struct vnode* vnode;
4025 
4026 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4027 	if (descriptor == NULL)
4028 		return B_FILE_ERROR;
4029 
4030 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4031 		count, 0, _numBytes);
4032 
4033 	put_fd(descriptor);
4034 	return status;
4035 }
4036 
4037 
4038 extern "C" status_t
4039 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4040 	size_t* _numBytes)
4041 {
4042 	struct file_descriptor* descriptor;
4043 	struct vnode* vnode;
4044 
4045 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4046 	if (descriptor == NULL)
4047 		return B_FILE_ERROR;
4048 
4049 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4050 		count, 0, _numBytes);
4051 
4052 	put_fd(descriptor);
4053 	return status;
4054 }
4055 #endif
4056 
4057 
4058 extern "C" status_t
read_file_io_vec_pages(int fd,const file_io_vec * fileVecs,size_t fileVecCount,const iovec * vecs,size_t vecCount,uint32 * _vecIndex,size_t * _vecOffset,size_t * _bytes)4059 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4060 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4061 	size_t* _bytes)
4062 {
4063 	struct vnode* vnode;
4064 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4065 	if (!descriptor.IsSet())
4066 		return B_FILE_ERROR;
4067 
4068 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4069 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4070 		false);
4071 
4072 	return status;
4073 }
4074 
4075 
4076 extern "C" status_t
write_file_io_vec_pages(int fd,const file_io_vec * fileVecs,size_t fileVecCount,const iovec * vecs,size_t vecCount,uint32 * _vecIndex,size_t * _vecOffset,size_t * _bytes)4077 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4078 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4079 	size_t* _bytes)
4080 {
4081 	struct vnode* vnode;
4082 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4083 	if (!descriptor.IsSet())
4084 		return B_FILE_ERROR;
4085 
4086 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4087 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4088 		true);
4089 
4090 	return status;
4091 }
4092 
4093 
4094 extern "C" status_t
entry_cache_add(dev_t mountID,ino_t dirID,const char * name,ino_t nodeID)4095 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4096 {
4097 	// lookup mount -- the caller is required to make sure that the mount
4098 	// won't go away
4099 	ReadLocker locker(sMountLock);
4100 	struct fs_mount* mount = find_mount(mountID);
4101 	if (mount == NULL)
4102 		return B_BAD_VALUE;
4103 	locker.Unlock();
4104 
4105 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4106 }
4107 
4108 
4109 extern "C" status_t
entry_cache_add_missing(dev_t mountID,ino_t dirID,const char * name)4110 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4111 {
4112 	// lookup mount -- the caller is required to make sure that the mount
4113 	// won't go away
4114 	ReadLocker locker(sMountLock);
4115 	struct fs_mount* mount = find_mount(mountID);
4116 	if (mount == NULL)
4117 		return B_BAD_VALUE;
4118 	locker.Unlock();
4119 
4120 	return mount->entry_cache.Add(dirID, name, -1, true);
4121 }
4122 
4123 
4124 extern "C" status_t
entry_cache_remove(dev_t mountID,ino_t dirID,const char * name)4125 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4126 {
4127 	// lookup mount -- the caller is required to make sure that the mount
4128 	// won't go away
4129 	ReadLocker locker(sMountLock);
4130 	struct fs_mount* mount = find_mount(mountID);
4131 	if (mount == NULL)
4132 		return B_BAD_VALUE;
4133 	locker.Unlock();
4134 
4135 	return mount->entry_cache.Remove(dirID, name);
4136 }
4137 
4138 
4139 //	#pragma mark - private VFS API
4140 //	Functions the VFS exports for other parts of the kernel
4141 
4142 
4143 /*! Acquires another reference to the vnode that has to be released
4144 	by calling vfs_put_vnode().
4145 */
4146 void
vfs_acquire_vnode(struct vnode * vnode)4147 vfs_acquire_vnode(struct vnode* vnode)
4148 {
4149 	inc_vnode_ref_count(vnode);
4150 }
4151 
4152 
4153 /*! This is currently called from file_cache_create() only.
4154 	It's probably a temporary solution as long as devfs requires that
4155 	fs_read_pages()/fs_write_pages() are called with the standard
4156 	open cookie and not with a device cookie.
4157 	If that's done differently, remove this call; it has no other
4158 	purpose.
4159 */
4160 extern "C" status_t
vfs_get_cookie_from_fd(int fd,void ** _cookie)4161 vfs_get_cookie_from_fd(int fd, void** _cookie)
4162 {
4163 	struct file_descriptor* descriptor;
4164 
4165 	descriptor = get_fd(get_current_io_context(true), fd);
4166 	if (descriptor == NULL)
4167 		return B_FILE_ERROR;
4168 
4169 	*_cookie = descriptor->cookie;
4170 	return B_OK;
4171 }
4172 
4173 
4174 extern "C" status_t
vfs_get_vnode_from_fd(int fd,bool kernel,struct vnode ** vnode)4175 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4176 {
4177 	*vnode = get_vnode_from_fd(fd, kernel);
4178 
4179 	if (*vnode == NULL)
4180 		return B_FILE_ERROR;
4181 
4182 	return B_NO_ERROR;
4183 }
4184 
4185 
4186 extern "C" status_t
vfs_get_vnode_from_path(const char * path,bool kernel,struct vnode ** _vnode)4187 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4188 {
4189 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4190 		path, kernel));
4191 
4192 	KPath pathBuffer;
4193 	if (pathBuffer.InitCheck() != B_OK)
4194 		return B_NO_MEMORY;
4195 
4196 	char* buffer = pathBuffer.LockBuffer();
4197 	strlcpy(buffer, path, pathBuffer.BufferSize());
4198 
4199 	VnodePutter vnode;
4200 	status_t status = path_to_vnode(buffer, true, vnode, NULL, kernel);
4201 	if (status != B_OK)
4202 		return status;
4203 
4204 	*_vnode = vnode.Detach();
4205 	return B_OK;
4206 }
4207 
4208 
4209 extern "C" status_t
vfs_get_vnode(dev_t mountID,ino_t vnodeID,bool canWait,struct vnode ** _vnode)4210 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4211 {
4212 	struct vnode* vnode = NULL;
4213 
4214 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4215 	if (status != B_OK)
4216 		return status;
4217 
4218 	*_vnode = vnode;
4219 	return B_OK;
4220 }
4221 
4222 
4223 extern "C" status_t
vfs_entry_ref_to_vnode(dev_t mountID,ino_t directoryID,const char * name,struct vnode ** _vnode)4224 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4225 	const char* name, struct vnode** _vnode)
4226 {
4227 	VnodePutter vnode;
4228 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, false, true, vnode);
4229 	*_vnode = vnode.Detach();
4230 	return status;
4231 }
4232 
4233 
4234 extern "C" void
vfs_vnode_to_node_ref(struct vnode * vnode,dev_t * _mountID,ino_t * _vnodeID)4235 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4236 {
4237 	*_mountID = vnode->device;
4238 	*_vnodeID = vnode->id;
4239 }
4240 
4241 
4242 /*!
4243 	Helper function abstracting the process of "converting" a given
4244 	vnode-pointer to a fs_vnode-pointer.
4245 	Currently only used in bindfs.
4246 */
4247 extern "C" fs_vnode*
vfs_fsnode_for_vnode(struct vnode * vnode)4248 vfs_fsnode_for_vnode(struct vnode* vnode)
4249 {
4250 	return vnode;
4251 }
4252 
4253 
4254 /*!
4255 	Calls fs_open() on the given vnode and returns a new
4256 	file descriptor for it
4257 */
4258 int
vfs_open_vnode(struct vnode * vnode,int openMode,bool kernel)4259 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4260 {
4261 	return open_vnode(vnode, openMode, kernel);
4262 }
4263 
4264 
4265 /*!	Looks up a vnode with the given mount and vnode ID.
4266 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4267 	to the node.
4268 	It's currently only be used by file_cache_create().
4269 */
4270 extern "C" status_t
vfs_lookup_vnode(dev_t mountID,ino_t vnodeID,struct vnode ** _vnode)4271 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4272 {
4273 	rw_lock_read_lock(&sVnodeLock);
4274 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4275 	rw_lock_read_unlock(&sVnodeLock);
4276 
4277 	if (vnode == NULL)
4278 		return B_ERROR;
4279 
4280 	*_vnode = vnode;
4281 	return B_OK;
4282 }
4283 
4284 
4285 extern "C" status_t
vfs_get_fs_node_from_path(fs_volume * volume,const char * path,bool traverseLeafLink,bool kernel,void ** _node)4286 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4287 	bool traverseLeafLink, bool kernel, void** _node)
4288 {
4289 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4290 		volume, path, kernel));
4291 
4292 	KPath pathBuffer;
4293 	if (pathBuffer.InitCheck() != B_OK)
4294 		return B_NO_MEMORY;
4295 
4296 	fs_mount* mount;
4297 	status_t status = get_mount(volume->id, &mount);
4298 	if (status != B_OK)
4299 		return status;
4300 
4301 	char* buffer = pathBuffer.LockBuffer();
4302 	strlcpy(buffer, path, pathBuffer.BufferSize());
4303 
4304 	VnodePutter vnode;
4305 
4306 	if (buffer[0] == '/')
4307 		status = path_to_vnode(buffer, traverseLeafLink, vnode, NULL, kernel);
4308 	else {
4309 		inc_vnode_ref_count(mount->root_vnode);
4310 			// vnode_path_to_vnode() releases a reference to the starting vnode
4311 		status = vnode_path_to_vnode(mount->root_vnode, buffer, traverseLeafLink,
4312 			kernel, vnode, NULL);
4313 	}
4314 
4315 	put_mount(mount);
4316 
4317 	if (status != B_OK)
4318 		return status;
4319 
4320 	if (vnode->device != volume->id) {
4321 		// wrong mount ID - must not gain access on foreign file system nodes
4322 		return B_BAD_VALUE;
4323 	}
4324 
4325 	// Use get_vnode() to resolve the cookie for the right layer.
4326 	status = get_vnode(volume, vnode->id, _node);
4327 
4328 	return status;
4329 }
4330 
4331 
4332 status_t
vfs_read_stat(int fd,const char * path,bool traverseLeafLink,struct stat * stat,bool kernel)4333 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4334 	struct stat* stat, bool kernel)
4335 {
4336 	status_t status;
4337 
4338 	if (path != NULL) {
4339 		// path given: get the stat of the node referred to by (fd, path)
4340 		KPath pathBuffer(path);
4341 		if (pathBuffer.InitCheck() != B_OK)
4342 			return B_NO_MEMORY;
4343 
4344 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4345 			traverseLeafLink, stat, kernel);
4346 	} else {
4347 		// no path given: get the FD and use the FD operation
4348 		FileDescriptorPutter descriptor
4349 			(get_fd(get_current_io_context(kernel), fd));
4350 		if (!descriptor.IsSet())
4351 			return B_FILE_ERROR;
4352 
4353 		if (descriptor->ops->fd_read_stat)
4354 			status = descriptor->ops->fd_read_stat(descriptor.Get(), stat);
4355 		else
4356 			status = B_UNSUPPORTED;
4357 	}
4358 
4359 	return status;
4360 }
4361 
4362 
4363 /*!	Finds the full path to the file that contains the module \a moduleName,
4364 	puts it into \a pathBuffer, and returns B_OK for success.
4365 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4366 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4367 	\a pathBuffer is clobbered in any case and must not be relied on if this
4368 	functions returns unsuccessfully.
4369 	\a basePath and \a pathBuffer must not point to the same space.
4370 */
4371 status_t
vfs_get_module_path(const char * basePath,const char * moduleName,char * pathBuffer,size_t bufferSize)4372 vfs_get_module_path(const char* basePath, const char* moduleName,
4373 	char* pathBuffer, size_t bufferSize)
4374 {
4375 	status_t status;
4376 	size_t length;
4377 	char* path;
4378 
4379 	if (bufferSize == 0
4380 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4381 		return B_BUFFER_OVERFLOW;
4382 
4383 	VnodePutter dir;
4384 	status = path_to_vnode(pathBuffer, true, dir, NULL, true);
4385 	if (status != B_OK)
4386 		return status;
4387 
4388 	// the path buffer had been clobbered by the above call
4389 	length = strlcpy(pathBuffer, basePath, bufferSize);
4390 	if (pathBuffer[length - 1] != '/')
4391 		pathBuffer[length++] = '/';
4392 
4393 	path = pathBuffer + length;
4394 	bufferSize -= length;
4395 
4396 	VnodePutter file;
4397 	while (moduleName) {
4398 		char* nextPath = strchr(moduleName, '/');
4399 		if (nextPath == NULL)
4400 			length = strlen(moduleName);
4401 		else {
4402 			length = nextPath - moduleName;
4403 			nextPath++;
4404 		}
4405 
4406 		if (length + 1 >= bufferSize)
4407 			return B_BUFFER_OVERFLOW;
4408 
4409 		memcpy(path, moduleName, length);
4410 		path[length] = '\0';
4411 		moduleName = nextPath;
4412 
4413 		// vnode_path_to_vnode() assumes ownership of the passed dir
4414 		status = vnode_path_to_vnode(dir.Detach(), path, true, true, file, NULL);
4415 		if (status != B_OK)
4416 			return status;
4417 
4418 		if (S_ISDIR(file->Type())) {
4419 			// goto the next directory
4420 			path[length] = '/';
4421 			path[length + 1] = '\0';
4422 			path += length + 1;
4423 			bufferSize -= length + 1;
4424 
4425 			dir.SetTo(file.Detach());
4426 		} else if (S_ISREG(file->Type())) {
4427 			// it's a file so it should be what we've searched for
4428 			return B_OK;
4429 		} else {
4430 			TRACE(("vfs_get_module_path(): something is strange here: "
4431 				"0x%08" B_PRIx32 "...\n", file->Type()));
4432 			return B_ERROR;
4433 		}
4434 	}
4435 
4436 	// if we got here, the moduleName just pointed to a directory, not to
4437 	// a real module - what should we do in this case?
4438 	return B_ENTRY_NOT_FOUND;
4439 }
4440 
4441 
4442 /*!	\brief Normalizes a given path.
4443 
4444 	The path must refer to an existing or non-existing entry in an existing
4445 	directory, that is chopping off the leaf component the remaining path must
4446 	refer to an existing directory.
4447 
4448 	The returned will be canonical in that it will be absolute, will not
4449 	contain any "." or ".." components or duplicate occurrences of '/'s,
4450 	and none of the directory components will by symbolic links.
4451 
4452 	Any two paths referring to the same entry, will result in the same
4453 	normalized path (well, that is pretty much the definition of `normalized',
4454 	isn't it :-).
4455 
4456 	\param path The path to be normalized.
4457 	\param buffer The buffer into which the normalized path will be written.
4458 		   May be the same one as \a path.
4459 	\param bufferSize The size of \a buffer.
4460 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4461 	\param kernel \c true, if the IO context of the kernel shall be used,
4462 		   otherwise that of the team this thread belongs to. Only relevant,
4463 		   if the path is relative (to get the CWD).
4464 	\return \c B_OK if everything went fine, another error code otherwise.
4465 */
4466 status_t
vfs_normalize_path(const char * path,char * buffer,size_t bufferSize,bool traverseLink,bool kernel)4467 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4468 	bool traverseLink, bool kernel)
4469 {
4470 	if (!path || !buffer || bufferSize < 1)
4471 		return B_BAD_VALUE;
4472 
4473 	if (path != buffer) {
4474 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4475 			return B_BUFFER_OVERFLOW;
4476 	}
4477 
4478 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4479 }
4480 
4481 
4482 /*!	\brief Gets the parent of the passed in node.
4483 
4484 	Gets the parent of the passed in node, and correctly resolves covered
4485 	nodes.
4486 */
4487 extern "C" status_t
vfs_resolve_parent(struct vnode * parent,dev_t * device,ino_t * node)4488 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4489 {
4490 	return resolve_covered_parent(parent, device, node,
4491 		get_current_io_context(true));
4492 }
4493 
4494 
4495 /*!	\brief Creates a special node in the file system.
4496 
4497 	The caller gets a reference to the newly created node (which is passed
4498 	back through \a _createdVnode) and is responsible for releasing it.
4499 
4500 	\param path The path where to create the entry for the node. Can be \c NULL,
4501 		in which case the node is created without an entry in the root FS -- it
4502 		will automatically be deleted when the last reference has been released.
4503 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4504 		the target file system will just create the node with its standard
4505 		operations. Depending on the type of the node a subnode might be created
4506 		automatically, though.
4507 	\param mode The type and permissions for the node to be created.
4508 	\param flags Flags to be passed to the creating FS.
4509 	\param kernel \c true, if called in the kernel context (relevant only if
4510 		\a path is not \c NULL and not absolute).
4511 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4512 		file system creating the node, with the private data pointer and
4513 		operations for the super node. Can be \c NULL.
4514 	\param _createVnode Pointer to pre-allocated storage where to store the
4515 		pointer to the newly created node.
4516 	\return \c B_OK, if everything went fine, another error code otherwise.
4517 */
4518 status_t
vfs_create_special_node(const char * path,fs_vnode * subVnode,mode_t mode,uint32 flags,bool kernel,fs_vnode * _superVnode,struct vnode ** _createdVnode)4519 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4520 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4521 	struct vnode** _createdVnode)
4522 {
4523 	VnodePutter dirNode;
4524 	char _leaf[B_FILE_NAME_LENGTH];
4525 	char* leaf = NULL;
4526 
4527 	if (path) {
4528 		// We've got a path. Get the dir vnode and the leaf name.
4529 		KPath tmpPathBuffer;
4530 		if (tmpPathBuffer.InitCheck() != B_OK)
4531 			return B_NO_MEMORY;
4532 
4533 		char* tmpPath = tmpPathBuffer.LockBuffer();
4534 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4535 			return B_NAME_TOO_LONG;
4536 
4537 		// get the dir vnode and the leaf name
4538 		leaf = _leaf;
4539 		status_t error = path_to_dir_vnode(tmpPath, dirNode, leaf, kernel);
4540 		if (error != B_OK)
4541 			return error;
4542 	} else {
4543 		// No path. Create the node in the root FS.
4544 		dirNode.SetTo(sRoot);
4545 		inc_vnode_ref_count(dirNode.Get());
4546 	}
4547 
4548 	// check support for creating special nodes
4549 	if (!HAS_FS_CALL(dirNode, create_special_node))
4550 		return B_UNSUPPORTED;
4551 
4552 	// create the node
4553 	fs_vnode superVnode;
4554 	ino_t nodeID;
4555 	status_t status = FS_CALL(dirNode.Get(), create_special_node, leaf, subVnode,
4556 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4557 	if (status != B_OK)
4558 		return status;
4559 
4560 	// lookup the node
4561 	rw_lock_read_lock(&sVnodeLock);
4562 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4563 	rw_lock_read_unlock(&sVnodeLock);
4564 
4565 	if (*_createdVnode == NULL) {
4566 		panic("vfs_create_special_node(): lookup of node failed");
4567 		return B_ERROR;
4568 	}
4569 
4570 	return B_OK;
4571 }
4572 
4573 
4574 extern "C" void
vfs_put_vnode(struct vnode * vnode)4575 vfs_put_vnode(struct vnode* vnode)
4576 {
4577 	put_vnode(vnode);
4578 }
4579 
4580 
4581 extern "C" status_t
vfs_get_cwd(dev_t * _mountID,ino_t * _vnodeID)4582 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4583 {
4584 	// Get current working directory from io context
4585 	const struct io_context* context = get_current_io_context(false);
4586 
4587 	ReadLocker locker(context->lock);
4588 	if (context->cwd == NULL)
4589 		return B_ERROR;
4590 
4591 	*_mountID = context->cwd->device;
4592 	*_vnodeID = context->cwd->id;
4593 	return B_OK;
4594 }
4595 
4596 
4597 status_t
vfs_unmount(dev_t mountID,uint32 flags)4598 vfs_unmount(dev_t mountID, uint32 flags)
4599 {
4600 	return fs_unmount(NULL, mountID, flags, true);
4601 }
4602 
4603 
4604 extern "C" status_t
vfs_disconnect_vnode(dev_t mountID,ino_t vnodeID)4605 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4606 {
4607 	struct vnode* vnode;
4608 
4609 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4610 	if (status != B_OK)
4611 		return status;
4612 
4613 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4614 	put_vnode(vnode);
4615 	return B_OK;
4616 }
4617 
4618 
4619 extern "C" void
vfs_free_unused_vnodes(int32 level)4620 vfs_free_unused_vnodes(int32 level)
4621 {
4622 	vnode_low_resource_handler(NULL,
4623 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4624 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4625 		level);
4626 }
4627 
4628 
4629 extern "C" bool
vfs_can_page(struct vnode * vnode,void * cookie)4630 vfs_can_page(struct vnode* vnode, void* cookie)
4631 {
4632 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4633 
4634 	if (HAS_FS_CALL(vnode, can_page))
4635 		return FS_CALL(vnode, can_page, cookie);
4636 	return false;
4637 }
4638 
4639 
4640 extern "C" status_t
vfs_read_pages(struct vnode * vnode,void * cookie,off_t pos,const generic_io_vec * vecs,size_t count,uint32 flags,generic_size_t * _numBytes)4641 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4642 	const generic_io_vec* vecs, size_t count, uint32 flags,
4643 	generic_size_t* _numBytes)
4644 {
4645 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4646 		vecs, pos));
4647 
4648 #if VFS_PAGES_IO_TRACING
4649 	generic_size_t bytesRequested = *_numBytes;
4650 #endif
4651 
4652 	IORequest request;
4653 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4654 	if (status == B_OK) {
4655 		status = vfs_vnode_io(vnode, cookie, &request);
4656 		if (status == B_OK)
4657 			status = request.Wait();
4658 		*_numBytes = request.TransferredBytes();
4659 	}
4660 
4661 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4662 		status, *_numBytes));
4663 
4664 	return status;
4665 }
4666 
4667 
4668 extern "C" status_t
vfs_write_pages(struct vnode * vnode,void * cookie,off_t pos,const generic_io_vec * vecs,size_t count,uint32 flags,generic_size_t * _numBytes)4669 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4670 	const generic_io_vec* vecs, size_t count, uint32 flags,
4671 	generic_size_t* _numBytes)
4672 {
4673 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4674 		vecs, pos));
4675 
4676 #if VFS_PAGES_IO_TRACING
4677 	generic_size_t bytesRequested = *_numBytes;
4678 #endif
4679 
4680 	IORequest request;
4681 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4682 	if (status == B_OK) {
4683 		status = vfs_vnode_io(vnode, cookie, &request);
4684 		if (status == B_OK)
4685 			status = request.Wait();
4686 		*_numBytes = request.TransferredBytes();
4687 	}
4688 
4689 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4690 		status, *_numBytes));
4691 
4692 	return status;
4693 }
4694 
4695 
4696 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4697 	created if \a allocate is \c true.
4698 	In case it's successful, it will also grab a reference to the cache
4699 	it returns.
4700 */
4701 extern "C" status_t
vfs_get_vnode_cache(struct vnode * vnode,VMCache ** _cache,bool allocate)4702 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4703 {
4704 	if (vnode->cache != NULL) {
4705 		vnode->cache->AcquireRef();
4706 		*_cache = vnode->cache;
4707 		return B_OK;
4708 	}
4709 
4710 	rw_lock_read_lock(&sVnodeLock);
4711 	vnode->Lock();
4712 
4713 	status_t status = B_OK;
4714 
4715 	// The cache could have been created in the meantime
4716 	if (vnode->cache == NULL) {
4717 		if (allocate) {
4718 			// TODO: actually the vnode needs to be busy already here, or
4719 			//	else this won't work...
4720 			bool wasBusy = vnode->IsBusy();
4721 			vnode->SetBusy(true);
4722 
4723 			vnode->Unlock();
4724 			rw_lock_read_unlock(&sVnodeLock);
4725 
4726 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4727 
4728 			rw_lock_read_lock(&sVnodeLock);
4729 			vnode->Lock();
4730 			vnode->SetBusy(wasBusy);
4731 		} else
4732 			status = B_BAD_VALUE;
4733 	}
4734 
4735 	vnode->Unlock();
4736 	rw_lock_read_unlock(&sVnodeLock);
4737 
4738 	if (status == B_OK) {
4739 		vnode->cache->AcquireRef();
4740 		*_cache = vnode->cache;
4741 	}
4742 
4743 	return status;
4744 }
4745 
4746 
4747 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4748 	their own.
4749 	In case it's successful, it will also grab a reference to the cache
4750 	it returns.
4751 */
4752 extern "C" status_t
vfs_set_vnode_cache(struct vnode * vnode,VMCache * _cache)4753 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4754 {
4755 	rw_lock_read_lock(&sVnodeLock);
4756 	vnode->Lock();
4757 
4758 	status_t status = B_OK;
4759 	if (vnode->cache != NULL) {
4760 		status = B_NOT_ALLOWED;
4761 	} else {
4762 		vnode->cache = _cache;
4763 		_cache->AcquireRef();
4764 	}
4765 
4766 	vnode->Unlock();
4767 	rw_lock_read_unlock(&sVnodeLock);
4768 	return status;
4769 }
4770 
4771 
4772 status_t
vfs_get_file_map(struct vnode * vnode,off_t offset,size_t size,file_io_vec * vecs,size_t * _count)4773 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4774 	file_io_vec* vecs, size_t* _count)
4775 {
4776 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4777 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4778 
4779 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4780 }
4781 
4782 
4783 status_t
vfs_stat_vnode(struct vnode * vnode,struct stat * stat)4784 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4785 {
4786 	status_t status = FS_CALL(vnode, read_stat, stat);
4787 
4788 	// fill in the st_dev and st_ino fields
4789 	if (status == B_OK) {
4790 		stat->st_dev = vnode->device;
4791 		stat->st_ino = vnode->id;
4792 		// the rdev field must stay unset for non-special files
4793 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4794 			stat->st_rdev = -1;
4795 	}
4796 
4797 	return status;
4798 }
4799 
4800 
4801 status_t
vfs_stat_node_ref(dev_t device,ino_t inode,struct stat * stat)4802 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4803 {
4804 	struct vnode* vnode;
4805 	status_t status = get_vnode(device, inode, &vnode, true, false);
4806 	if (status != B_OK)
4807 		return status;
4808 
4809 	status = vfs_stat_vnode(vnode, stat);
4810 
4811 	put_vnode(vnode);
4812 	return status;
4813 }
4814 
4815 
4816 status_t
vfs_get_vnode_name(struct vnode * vnode,char * name,size_t nameSize)4817 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4818 {
4819 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4820 }
4821 
4822 
4823 status_t
vfs_entry_ref_to_path(dev_t device,ino_t inode,const char * leaf,bool kernel,char * path,size_t pathLength)4824 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4825 	bool kernel, char* path, size_t pathLength)
4826 {
4827 	VnodePutter vnode;
4828 	status_t status;
4829 
4830 	// filter invalid leaf names
4831 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4832 		return B_BAD_VALUE;
4833 
4834 	// get the vnode matching the dir's node_ref
4835 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4836 		// special cases "." and "..": we can directly get the vnode of the
4837 		// referenced directory
4838 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, vnode);
4839 		leaf = NULL;
4840 	} else {
4841 		struct vnode* temp = NULL;
4842 		status = get_vnode(device, inode, &temp, true, false);
4843 		vnode.SetTo(temp);
4844 	}
4845 	if (status != B_OK)
4846 		return status;
4847 
4848 	// get the directory path
4849 	status = dir_vnode_to_path(vnode.Get(), path, pathLength, kernel);
4850 	vnode.Unset();
4851 		// we don't need the vnode anymore
4852 	if (status != B_OK)
4853 		return status;
4854 
4855 	// append the leaf name
4856 	if (leaf) {
4857 		// insert a directory separator if this is not the file system root
4858 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4859 				>= pathLength)
4860 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4861 			return B_NAME_TOO_LONG;
4862 		}
4863 	}
4864 
4865 	return B_OK;
4866 }
4867 
4868 
4869 /*!	If the given descriptor locked its vnode, that lock will be released. */
4870 void
vfs_unlock_vnode_if_locked(struct file_descriptor * descriptor)4871 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4872 {
4873 	struct vnode* vnode = fd_vnode(descriptor);
4874 
4875 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4876 		vnode->mandatory_locked_by = NULL;
4877 }
4878 
4879 
4880 /*!	Releases any POSIX locks on the file descriptor. */
4881 status_t
vfs_release_posix_lock(io_context * context,struct file_descriptor * descriptor)4882 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4883 {
4884 	struct vnode* vnode = descriptor->u.vnode;
4885 	if (vnode == NULL)
4886 		return B_OK;
4887 
4888 	if (HAS_FS_CALL(vnode, release_lock))
4889 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4890 
4891 	return release_advisory_lock(vnode, context, NULL, NULL);
4892 }
4893 
4894 
4895 /*!	Closes all file descriptors of the specified I/O context that
4896 	have the O_CLOEXEC flag set.
4897 */
4898 void
vfs_exec_io_context(io_context * context)4899 vfs_exec_io_context(io_context* context)
4900 {
4901 	for (uint32 i = 0; i < context->table_size; i++) {
4902 		rw_lock_write_lock(&context->lock);
4903 
4904 		struct file_descriptor* descriptor = context->fds[i];
4905 		bool remove = false;
4906 
4907 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4908 			context->fds[i] = NULL;
4909 			context->num_used_fds--;
4910 
4911 			remove = true;
4912 		}
4913 
4914 		rw_lock_write_unlock(&context->lock);
4915 
4916 		if (remove) {
4917 			close_fd(context, descriptor);
4918 			put_fd(descriptor);
4919 		}
4920 	}
4921 }
4922 
4923 
4924 /*! Sets up a new io_control structure, and inherits the properties
4925 	of the parent io_control if it is given.
4926 */
4927 io_context*
vfs_new_io_context(const io_context * parentContext,bool purgeCloseOnExec)4928 vfs_new_io_context(const io_context* parentContext, bool purgeCloseOnExec)
4929 {
4930 	io_context* context = (io_context*)malloc(sizeof(io_context));
4931 	if (context == NULL)
4932 		return NULL;
4933 
4934 	TIOC(NewIOContext(context, parentContext));
4935 
4936 	memset(context, 0, sizeof(io_context));
4937 	context->ref_count = 1;
4938 
4939 	ReadLocker parentLocker;
4940 
4941 	size_t tableSize;
4942 	if (parentContext != NULL) {
4943 		parentLocker.SetTo(parentContext->lock, false);
4944 		tableSize = parentContext->table_size;
4945 	} else
4946 		tableSize = DEFAULT_FD_TABLE_SIZE;
4947 
4948 	// allocate space for FDs and their close-on-exec flag
4949 	context->fds = (file_descriptor**)malloc(
4950 		sizeof(struct file_descriptor*) * tableSize
4951 		+ sizeof(struct select_info**) * tableSize
4952 		+ (tableSize + 7) / 8);
4953 	if (context->fds == NULL) {
4954 		free(context);
4955 		return NULL;
4956 	}
4957 
4958 	context->select_infos = (select_info**)(context->fds + tableSize);
4959 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4960 
4961 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4962 		+ sizeof(struct select_info**) * tableSize
4963 		+ (tableSize + 7) / 8);
4964 
4965 	rw_lock_init(&context->lock, "I/O context");
4966 
4967 	// Copy all parent file descriptors
4968 
4969 	if (parentContext != NULL) {
4970 		mutex_lock(&sIOContextRootLock);
4971 		context->root = parentContext->root;
4972 		if (context->root)
4973 			inc_vnode_ref_count(context->root);
4974 		mutex_unlock(&sIOContextRootLock);
4975 
4976 		context->cwd = parentContext->cwd;
4977 		if (context->cwd)
4978 			inc_vnode_ref_count(context->cwd);
4979 
4980 		if (parentContext->inherit_fds) {
4981 			for (size_t i = 0; i < tableSize; i++) {
4982 				struct file_descriptor* descriptor = parentContext->fds[i];
4983 
4984 				if (descriptor != NULL
4985 						&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
4986 					const bool closeOnExec = fd_close_on_exec(parentContext, i);
4987 					if (closeOnExec && purgeCloseOnExec)
4988 						continue;
4989 
4990 					TFD(InheritFD(context, i, descriptor, parentContext));
4991 
4992 					context->fds[i] = descriptor;
4993 					context->num_used_fds++;
4994 					atomic_add(&descriptor->ref_count, 1);
4995 					atomic_add(&descriptor->open_count, 1);
4996 
4997 					if (closeOnExec)
4998 						fd_set_close_on_exec(context, i, true);
4999 				}
5000 			}
5001 		}
5002 
5003 		parentLocker.Unlock();
5004 	} else {
5005 		context->root = sRoot;
5006 		context->cwd = sRoot;
5007 
5008 		if (context->root)
5009 			inc_vnode_ref_count(context->root);
5010 
5011 		if (context->cwd)
5012 			inc_vnode_ref_count(context->cwd);
5013 	}
5014 
5015 	context->table_size = tableSize;
5016 	context->inherit_fds = parentContext != NULL;
5017 
5018 	list_init(&context->node_monitors);
5019 	context->max_monitors = DEFAULT_NODE_MONITORS;
5020 
5021 	return context;
5022 }
5023 
5024 
5025 void
vfs_get_io_context(io_context * context)5026 vfs_get_io_context(io_context* context)
5027 {
5028 	atomic_add(&context->ref_count, 1);
5029 }
5030 
5031 
5032 void
vfs_put_io_context(io_context * context)5033 vfs_put_io_context(io_context* context)
5034 {
5035 	if (atomic_add(&context->ref_count, -1) == 1)
5036 		free_io_context(context);
5037 }
5038 
5039 
5040 status_t
vfs_resize_fd_table(struct io_context * context,uint32 newSize)5041 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5042 {
5043 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5044 		return B_BAD_VALUE;
5045 
5046 	TIOC(ResizeIOContext(context, newSize));
5047 
5048 	WriteLocker locker(context->lock);
5049 
5050 	uint32 oldSize = context->table_size;
5051 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5052 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5053 
5054 	// If the tables shrink, make sure none of the fds being dropped are in use.
5055 	if (newSize < oldSize) {
5056 		for (uint32 i = oldSize; i-- > newSize;) {
5057 			if (context->fds[i])
5058 				return B_BUSY;
5059 		}
5060 	}
5061 
5062 	// store pointers to the old tables
5063 	file_descriptor** oldFDs = context->fds;
5064 	select_info** oldSelectInfos = context->select_infos;
5065 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5066 
5067 	// allocate new tables
5068 	file_descriptor** newFDs = (file_descriptor**)malloc(
5069 		sizeof(struct file_descriptor*) * newSize
5070 		+ sizeof(struct select_infos**) * newSize
5071 		+ newCloseOnExitBitmapSize);
5072 	if (newFDs == NULL)
5073 		return B_NO_MEMORY;
5074 
5075 	context->fds = newFDs;
5076 	context->select_infos = (select_info**)(context->fds + newSize);
5077 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5078 	context->table_size = newSize;
5079 
5080 	// copy entries from old tables
5081 	uint32 toCopy = min_c(oldSize, newSize);
5082 
5083 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5084 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5085 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5086 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5087 
5088 	// clear additional entries, if the tables grow
5089 	if (newSize > oldSize) {
5090 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5091 		memset(context->select_infos + oldSize, 0,
5092 			sizeof(void*) * (newSize - oldSize));
5093 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5094 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5095 	}
5096 
5097 	free(oldFDs);
5098 
5099 	return B_OK;
5100 }
5101 
5102 
5103 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5104 
5105 	Given an arbitrary vnode (identified by mount and node ID), the function
5106 	checks, whether the vnode is covered by another vnode. If it is, the
5107 	function returns the mount and node ID of the covering vnode. Otherwise
5108 	it simply returns the supplied mount and node ID.
5109 
5110 	In case of error (e.g. the supplied node could not be found) the variables
5111 	for storing the resolved mount and node ID remain untouched and an error
5112 	code is returned.
5113 
5114 	\param mountID The mount ID of the vnode in question.
5115 	\param nodeID The node ID of the vnode in question.
5116 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5117 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5118 	\return
5119 	- \c B_OK, if everything went fine,
5120 	- another error code, if something went wrong.
5121 */
5122 status_t
vfs_resolve_vnode_to_covering_vnode(dev_t mountID,ino_t nodeID,dev_t * resolvedMountID,ino_t * resolvedNodeID)5123 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5124 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5125 {
5126 	// get the node
5127 	struct vnode* node;
5128 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5129 	if (error != B_OK)
5130 		return error;
5131 
5132 	// resolve the node
5133 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5134 		put_vnode(node);
5135 		node = coveringNode;
5136 	}
5137 
5138 	// set the return values
5139 	*resolvedMountID = node->device;
5140 	*resolvedNodeID = node->id;
5141 
5142 	put_vnode(node);
5143 
5144 	return B_OK;
5145 }
5146 
5147 
5148 status_t
vfs_get_mount_point(dev_t mountID,dev_t * _mountPointMountID,ino_t * _mountPointNodeID)5149 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5150 	ino_t* _mountPointNodeID)
5151 {
5152 	ReadLocker nodeLocker(sVnodeLock);
5153 	ReadLocker mountLocker(sMountLock);
5154 
5155 	struct fs_mount* mount = find_mount(mountID);
5156 	if (mount == NULL)
5157 		return B_BAD_VALUE;
5158 
5159 	Vnode* mountPoint = mount->covers_vnode;
5160 
5161 	*_mountPointMountID = mountPoint->device;
5162 	*_mountPointNodeID = mountPoint->id;
5163 
5164 	return B_OK;
5165 }
5166 
5167 
5168 status_t
vfs_bind_mount_directory(dev_t mountID,ino_t nodeID,dev_t coveredMountID,ino_t coveredNodeID)5169 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5170 	ino_t coveredNodeID)
5171 {
5172 	// get the vnodes
5173 	Vnode* vnode;
5174 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5175 	if (error != B_OK)
5176 		return B_BAD_VALUE;
5177 	VnodePutter vnodePutter(vnode);
5178 
5179 	Vnode* coveredVnode;
5180 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5181 		false);
5182 	if (error != B_OK)
5183 		return B_BAD_VALUE;
5184 	VnodePutter coveredVnodePutter(coveredVnode);
5185 
5186 	// establish the covered/covering links
5187 	WriteLocker locker(sVnodeLock);
5188 
5189 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5190 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5191 		return B_BUSY;
5192 	}
5193 
5194 	vnode->covers = coveredVnode;
5195 	vnode->SetCovering(true);
5196 
5197 	coveredVnode->covered_by = vnode;
5198 	coveredVnode->SetCovered(true);
5199 
5200 	// the vnodes do now reference each other
5201 	inc_vnode_ref_count(vnode);
5202 	inc_vnode_ref_count(coveredVnode);
5203 
5204 	return B_OK;
5205 }
5206 
5207 
5208 int
vfs_getrlimit(int resource,struct rlimit * rlp)5209 vfs_getrlimit(int resource, struct rlimit* rlp)
5210 {
5211 	if (!rlp)
5212 		return B_BAD_ADDRESS;
5213 
5214 	switch (resource) {
5215 		case RLIMIT_NOFILE:
5216 		{
5217 			struct io_context* context = get_current_io_context(false);
5218 			ReadLocker _(context->lock);
5219 
5220 			rlp->rlim_cur = context->table_size;
5221 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5222 			return 0;
5223 		}
5224 
5225 		case RLIMIT_NOVMON:
5226 		{
5227 			struct io_context* context = get_current_io_context(false);
5228 			ReadLocker _(context->lock);
5229 
5230 			rlp->rlim_cur = context->max_monitors;
5231 			rlp->rlim_max = MAX_NODE_MONITORS;
5232 			return 0;
5233 		}
5234 
5235 		default:
5236 			return B_BAD_VALUE;
5237 	}
5238 }
5239 
5240 
5241 int
vfs_setrlimit(int resource,const struct rlimit * rlp)5242 vfs_setrlimit(int resource, const struct rlimit* rlp)
5243 {
5244 	if (!rlp)
5245 		return B_BAD_ADDRESS;
5246 
5247 	switch (resource) {
5248 		case RLIMIT_NOFILE:
5249 			/* TODO: check getuid() */
5250 			if (rlp->rlim_max != RLIM_SAVED_MAX
5251 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5252 				return B_NOT_ALLOWED;
5253 
5254 			return vfs_resize_fd_table(get_current_io_context(false),
5255 				rlp->rlim_cur);
5256 
5257 		case RLIMIT_NOVMON:
5258 			/* TODO: check getuid() */
5259 			if (rlp->rlim_max != RLIM_SAVED_MAX
5260 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5261 				return B_NOT_ALLOWED;
5262 
5263 			return resize_monitor_table(get_current_io_context(false),
5264 				rlp->rlim_cur);
5265 
5266 		default:
5267 			return B_BAD_VALUE;
5268 	}
5269 }
5270 
5271 
5272 status_t
vfs_init(kernel_args * args)5273 vfs_init(kernel_args* args)
5274 {
5275 	vnode::StaticInit();
5276 
5277 	sVnodeTable = new(std::nothrow) VnodeTable();
5278 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5279 		panic("vfs_init: error creating vnode hash table\n");
5280 
5281 	sMountsTable = new(std::nothrow) MountTable();
5282 	if (sMountsTable == NULL
5283 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5284 		panic("vfs_init: error creating mounts hash table\n");
5285 
5286 	sPathNameCache = create_object_cache("vfs path names",
5287 		B_PATH_NAME_LENGTH, 8, NULL, NULL, NULL);
5288 	if (sPathNameCache == NULL)
5289 		panic("vfs_init: error creating path name object_cache\n");
5290 
5291 	sVnodeCache = create_object_cache("vfs vnodes",
5292 		sizeof(struct vnode), 8, NULL, NULL, NULL);
5293 	if (sVnodeCache == NULL)
5294 		panic("vfs_init: error creating vnode object_cache\n");
5295 
5296 	sFileDescriptorCache = create_object_cache("vfs fds",
5297 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5298 	if (sFileDescriptorCache == NULL)
5299 		panic("vfs_init: error creating file descriptor object_cache\n");
5300 
5301 	node_monitor_init();
5302 
5303 	sRoot = NULL;
5304 
5305 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5306 
5307 	if (block_cache_init() != B_OK)
5308 		return B_ERROR;
5309 
5310 #ifdef ADD_DEBUGGER_COMMANDS
5311 	// add some debugger commands
5312 	add_debugger_command_etc("vnode", &dump_vnode,
5313 		"Print info about the specified vnode",
5314 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5315 		"Prints information about the vnode specified by address <vnode> or\n"
5316 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5317 		"constructed and printed. It might not be possible to construct a\n"
5318 		"complete path, though.\n",
5319 		0);
5320 	add_debugger_command("vnodes", &dump_vnodes,
5321 		"list all vnodes (from the specified device)");
5322 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5323 		"list all vnode caches");
5324 	add_debugger_command("mount", &dump_mount,
5325 		"info about the specified fs_mount");
5326 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5327 	add_debugger_command("io_context", &dump_io_context,
5328 		"info about the I/O context");
5329 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5330 		"info about vnode usage");
5331 #endif
5332 
5333 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5334 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5335 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5336 		0);
5337 
5338 	fifo_init();
5339 	file_map_init();
5340 
5341 	return file_cache_init();
5342 }
5343 
5344 
5345 //	#pragma mark - fd_ops implementations
5346 
5347 
5348 /*!
5349 	Calls fs_open() on the given vnode and returns a new
5350 	file descriptor for it
5351 */
5352 static int
open_vnode(struct vnode * vnode,int openMode,bool kernel)5353 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5354 {
5355 	void* cookie;
5356 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5357 	if (status != B_OK)
5358 		return status;
5359 
5360 	int fd = get_new_fd(&sFileOps, NULL, vnode, cookie, openMode, kernel);
5361 	if (fd < 0) {
5362 		FS_CALL(vnode, close, cookie);
5363 		FS_CALL(vnode, free_cookie, cookie);
5364 	}
5365 	return fd;
5366 }
5367 
5368 
5369 /*!
5370 	Calls fs_open() on the given vnode and returns a new
5371 	file descriptor for it
5372 */
5373 static int
create_vnode(struct vnode * directory,const char * name,int openMode,int perms,bool kernel)5374 create_vnode(struct vnode* directory, const char* name, int openMode,
5375 	int perms, bool kernel)
5376 {
5377 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5378 	status_t status = B_ERROR;
5379 	VnodePutter vnode, dirPutter;
5380 	void* cookie;
5381 	ino_t newID;
5382 	char clonedName[B_FILE_NAME_LENGTH + 1];
5383 
5384 	// This is somewhat tricky: If the entry already exists, the FS responsible
5385 	// for the directory might not necessarily also be the one responsible for
5386 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5387 	// we can actually never call the create() hook without O_EXCL. Instead we
5388 	// try to look the entry up first. If it already exists, we just open the
5389 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5390 	// introduces a race condition, since someone else might have created the
5391 	// entry in the meantime. We hope the respective FS returns the correct
5392 	// error code and retry (up to 3 times) again.
5393 
5394 	for (int i = 0; i < 3 && status != B_OK; i++) {
5395 		bool create = false;
5396 
5397 		// look the node up
5398 		{
5399 			struct vnode* entry = NULL;
5400 			status = lookup_dir_entry(directory, name, &entry);
5401 			vnode.SetTo(entry);
5402 		}
5403 		if (status == B_OK) {
5404 			if ((openMode & O_EXCL) != 0)
5405 				return B_FILE_EXISTS;
5406 
5407 			// If the node is a symlink, we have to follow it, unless
5408 			// O_NOTRAVERSE is set.
5409 			if (S_ISLNK(vnode->Type()) && traverse) {
5410 				vnode.Unset();
5411 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5412 						>= B_FILE_NAME_LENGTH) {
5413 					return B_NAME_TOO_LONG;
5414 				}
5415 
5416 				inc_vnode_ref_count(directory);
5417 				dirPutter.Unset();
5418 				status = vnode_path_to_vnode(directory, clonedName, true,
5419 					kernel, vnode, NULL, clonedName);
5420 				if (status != B_OK) {
5421 					// vnode is not found, but maybe it has a parent and we can create it from
5422 					// there. In that case, vnode_path_to_vnode has set vnode to the latest
5423 					// directory found in the path
5424 					if (status == B_ENTRY_NOT_FOUND) {
5425 						directory = vnode.Detach();
5426 						dirPutter.SetTo(directory);
5427 						name = clonedName;
5428 						create = true;
5429 					} else
5430 						return status;
5431 				}
5432 			}
5433 
5434 			if (!create) {
5435 				if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5436 					return B_LINK_LIMIT;
5437 
5438 				int fd = open_vnode(vnode.Get(), openMode & ~O_CREAT, kernel);
5439 				// on success keep the vnode reference for the FD
5440 				if (fd >= 0)
5441 					vnode.Detach();
5442 
5443 				return fd;
5444 			}
5445 		}
5446 
5447 		// it doesn't exist yet -- try to create it
5448 
5449 		if (!HAS_FS_CALL(directory, create))
5450 			return B_READ_ONLY_DEVICE;
5451 
5452 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5453 			&cookie, &newID);
5454 		if (status != B_OK
5455 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5456 			return status;
5457 		}
5458 	}
5459 
5460 	if (status != B_OK)
5461 		return status;
5462 
5463 	// the node has been created successfully
5464 
5465 	rw_lock_read_lock(&sVnodeLock);
5466 	vnode.SetTo(lookup_vnode(directory->device, newID));
5467 	rw_lock_read_unlock(&sVnodeLock);
5468 
5469 	if (!vnode.IsSet()) {
5470 		panic("vfs: fs_create() returned success but there is no vnode, "
5471 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5472 		return B_BAD_VALUE;
5473 	}
5474 
5475 	int fd = get_new_fd(&sFileOps, NULL, vnode.Get(), cookie, openMode, kernel);
5476 	if (fd >= 0) {
5477 		vnode.Detach();
5478 		return fd;
5479 	}
5480 
5481 	status = fd;
5482 
5483 	// something went wrong, clean up
5484 
5485 	FS_CALL(vnode.Get(), close, cookie);
5486 	FS_CALL(vnode.Get(), free_cookie, cookie);
5487 
5488 	FS_CALL(directory, unlink, name);
5489 
5490 	return status;
5491 }
5492 
5493 
5494 /*! Calls fs open_dir() on the given vnode and returns a new
5495 	file descriptor for it
5496 */
5497 static int
open_dir_vnode(struct vnode * vnode,bool kernel)5498 open_dir_vnode(struct vnode* vnode, bool kernel)
5499 {
5500 	if (!HAS_FS_CALL(vnode, open_dir))
5501 		return B_UNSUPPORTED;
5502 
5503 	void* cookie;
5504 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5505 	if (status != B_OK)
5506 		return status;
5507 
5508 	// directory is opened, create a fd
5509 	status = get_new_fd(&sDirectoryOps, NULL, vnode, cookie, O_CLOEXEC, kernel);
5510 	if (status >= 0)
5511 		return status;
5512 
5513 	FS_CALL(vnode, close_dir, cookie);
5514 	FS_CALL(vnode, free_dir_cookie, cookie);
5515 
5516 	return status;
5517 }
5518 
5519 
5520 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5521 	file descriptor for it.
5522 	Used by attr_dir_open(), and attr_dir_open_fd().
5523 */
5524 static int
open_attr_dir_vnode(struct vnode * vnode,bool kernel)5525 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5526 {
5527 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5528 		return B_UNSUPPORTED;
5529 
5530 	void* cookie;
5531 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5532 	if (status != B_OK)
5533 		return status;
5534 
5535 	// directory is opened, create a fd
5536 	status = get_new_fd(&sAttributeDirectoryOps, NULL, vnode, cookie, O_CLOEXEC,
5537 		kernel);
5538 	if (status >= 0)
5539 		return status;
5540 
5541 	FS_CALL(vnode, close_attr_dir, cookie);
5542 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5543 
5544 	return status;
5545 }
5546 
5547 
5548 static int
file_create_entry_ref(dev_t mountID,ino_t directoryID,const char * name,int openMode,int perms,bool kernel)5549 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5550 	int openMode, int perms, bool kernel)
5551 {
5552 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5553 		"kernel %d\n", name, openMode, perms, kernel));
5554 
5555 	// get directory to put the new file in
5556 	struct vnode* directory;
5557 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5558 	if (status != B_OK)
5559 		return status;
5560 
5561 	status = create_vnode(directory, name, openMode, perms, kernel);
5562 	put_vnode(directory);
5563 
5564 	return status;
5565 }
5566 
5567 
5568 static int
file_create(int fd,char * path,int openMode,int perms,bool kernel)5569 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5570 {
5571 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5572 		openMode, perms, kernel));
5573 
5574 	// get directory to put the new file in
5575 	char name[B_FILE_NAME_LENGTH];
5576 	VnodePutter directory;
5577 	status_t status = fd_and_path_to_dir_vnode(fd, path, directory, name,
5578 		kernel);
5579 	if (status < 0)
5580 		return status;
5581 
5582 	return create_vnode(directory.Get(), name, openMode, perms, kernel);
5583 }
5584 
5585 
5586 static int
file_open_entry_ref(dev_t mountID,ino_t directoryID,const char * name,int openMode,bool kernel)5587 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5588 	int openMode, bool kernel)
5589 {
5590 	if (name == NULL || *name == '\0')
5591 		return B_BAD_VALUE;
5592 
5593 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5594 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5595 
5596 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5597 
5598 	// get the vnode matching the entry_ref
5599 	VnodePutter vnode;
5600 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5601 		kernel, vnode);
5602 	if (status != B_OK)
5603 		return status;
5604 
5605 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5606 		return B_LINK_LIMIT;
5607 
5608 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5609 	if (newFD >= 0) {
5610 		cache_node_opened(vnode.Get(), vnode->cache, mountID,
5611 			directoryID, vnode->id, name);
5612 
5613 		// The vnode reference has been transferred to the FD
5614 		vnode.Detach();
5615 	}
5616 
5617 	return newFD;
5618 }
5619 
5620 
5621 static int
file_open(int fd,char * path,int openMode,bool kernel)5622 file_open(int fd, char* path, int openMode, bool kernel)
5623 {
5624 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5625 
5626 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5627 		fd, path, openMode, kernel));
5628 
5629 	// get the vnode matching the vnode + path combination
5630 	VnodePutter vnode;
5631 	ino_t parentID;
5632 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode,
5633 		&parentID, kernel);
5634 	if (status != B_OK)
5635 		return status;
5636 
5637 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5638 		return B_LINK_LIMIT;
5639 
5640 	// open the vnode
5641 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5642 	if (newFD >= 0) {
5643 		cache_node_opened(vnode.Get(), vnode->cache,
5644 			vnode->device, parentID, vnode->id, NULL);
5645 
5646 		// The vnode reference has been transferred to the FD
5647 		vnode.Detach();
5648 	}
5649 
5650 	return newFD;
5651 }
5652 
5653 
5654 static status_t
file_close(struct file_descriptor * descriptor)5655 file_close(struct file_descriptor* descriptor)
5656 {
5657 	struct vnode* vnode = descriptor->u.vnode;
5658 	status_t status = B_OK;
5659 
5660 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5661 
5662 	cache_node_closed(vnode, vnode->cache, vnode->device,
5663 		vnode->id);
5664 	if (HAS_FS_CALL(vnode, close)) {
5665 		status = FS_CALL(vnode, close, descriptor->cookie);
5666 	}
5667 
5668 	if (status == B_OK) {
5669 		// remove all outstanding locks for this team
5670 		if (HAS_FS_CALL(vnode, release_lock))
5671 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5672 		else
5673 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5674 	}
5675 	return status;
5676 }
5677 
5678 
5679 static void
file_free_fd(struct file_descriptor * descriptor)5680 file_free_fd(struct file_descriptor* descriptor)
5681 {
5682 	struct vnode* vnode = descriptor->u.vnode;
5683 
5684 	if (vnode != NULL) {
5685 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5686 		put_vnode(vnode);
5687 	}
5688 }
5689 
5690 
5691 static status_t
file_read(struct file_descriptor * descriptor,off_t pos,void * buffer,size_t * length)5692 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5693 	size_t* length)
5694 {
5695 	struct vnode* vnode = descriptor->u.vnode;
5696 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5697 		pos, length, *length));
5698 
5699 	if (S_ISDIR(vnode->Type()))
5700 		return B_IS_A_DIRECTORY;
5701 	if (pos != -1 && descriptor->pos == -1)
5702 		return ESPIPE;
5703 
5704 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5705 }
5706 
5707 
5708 static status_t
file_write(struct file_descriptor * descriptor,off_t pos,const void * buffer,size_t * length)5709 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5710 	size_t* length)
5711 {
5712 	struct vnode* vnode = descriptor->u.vnode;
5713 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5714 		length));
5715 
5716 	if (S_ISDIR(vnode->Type()))
5717 		return B_IS_A_DIRECTORY;
5718 	if (pos != -1 && descriptor->pos == -1)
5719 		return ESPIPE;
5720 
5721 	if (!HAS_FS_CALL(vnode, write))
5722 		return B_READ_ONLY_DEVICE;
5723 
5724 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5725 }
5726 
5727 
5728 static ssize_t
file_vector_io(struct file_descriptor * descriptor,off_t pos,const struct iovec * vecs,int count,bool write)5729 file_vector_io(struct file_descriptor* descriptor, off_t pos,
5730 	const struct iovec *vecs, int count, bool write)
5731 {
5732 	struct vnode* vnode = descriptor->u.vnode;
5733 	if (pos != -1 && descriptor->pos == -1)
5734 		return ESPIPE;
5735 	if (S_ISDIR(vnode->Type()))
5736 		return B_IS_A_DIRECTORY;
5737 
5738 	if (pos == -1)
5739 		return B_UNSUPPORTED;
5740 	if (!HAS_FS_CALL(vnode, io))
5741 		return B_UNSUPPORTED;
5742 
5743 	// We can only perform real vectored I/O for vnodes that have no cache,
5744 	// because the I/O hook bypasses the cache entirely.
5745 	if (vnode->cache != NULL)
5746 		return B_UNSUPPORTED;
5747 
5748 	BStackOrHeapArray<generic_io_vec, 8> iovecs(count);
5749 	if (!iovecs.IsValid())
5750 		return B_NO_MEMORY;
5751 
5752 	generic_size_t length = 0;
5753 	for (int i = 0; i < count; i++) {
5754 		iovecs[i].base = (generic_addr_t)vecs[i].iov_base;
5755 		iovecs[i].length = vecs[i].iov_len;
5756 		length += vecs[i].iov_len;
5757 	}
5758 
5759 	status_t status = (write ? vfs_write_pages : vfs_read_pages)(vnode,
5760 		descriptor->cookie, pos, iovecs, count, 0, &length);
5761 	if (length > 0)
5762 		return length;
5763 	return status;
5764 }
5765 
5766 
5767 static ssize_t
file_readv(struct file_descriptor * descriptor,off_t pos,const struct iovec * vecs,int count)5768 file_readv(struct file_descriptor* descriptor, off_t pos,
5769 	const struct iovec *vecs, int count)
5770 {
5771 	FUNCTION(("file_readv: pos %" B_PRIdOFF "\n", pos));
5772 	return file_vector_io(descriptor, pos, vecs, count, false);
5773 }
5774 
5775 
5776 static ssize_t
file_writev(struct file_descriptor * descriptor,off_t pos,const struct iovec * vecs,int count)5777 file_writev(struct file_descriptor* descriptor, off_t pos,
5778 	const struct iovec *vecs, int count)
5779 {
5780 	FUNCTION(("file_writev: pos %" B_PRIdOFF "\n", pos));
5781 	return file_vector_io(descriptor, pos, vecs, count, true);
5782 }
5783 
5784 
5785 static off_t
file_seek(struct file_descriptor * descriptor,off_t pos,int seekType)5786 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5787 {
5788 	struct vnode* vnode = descriptor->u.vnode;
5789 	off_t offset;
5790 	bool isDevice = false;
5791 
5792 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5793 		seekType));
5794 
5795 	if (descriptor->pos == -1)
5796 		return ESPIPE;
5797 
5798 	switch (vnode->Type() & S_IFMT) {
5799 		// drivers publish block devices as chr, so pick both
5800 		case S_IFBLK:
5801 		case S_IFCHR:
5802 			isDevice = true;
5803 			break;
5804 	}
5805 
5806 	switch (seekType) {
5807 		case SEEK_SET:
5808 			offset = 0;
5809 			break;
5810 		case SEEK_CUR:
5811 			offset = descriptor->pos;
5812 			break;
5813 		case SEEK_END:
5814 		{
5815 			// stat() the node
5816 			if (!HAS_FS_CALL(vnode, read_stat))
5817 				return B_UNSUPPORTED;
5818 
5819 			struct stat stat;
5820 			status_t status = FS_CALL(vnode, read_stat, &stat);
5821 			if (status != B_OK)
5822 				return status;
5823 
5824 			offset = stat.st_size;
5825 
5826 			if (offset == 0 && isDevice) {
5827 				// stat() on regular drivers doesn't report size
5828 				device_geometry geometry;
5829 
5830 				if (HAS_FS_CALL(vnode, ioctl)) {
5831 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5832 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5833 					if (status == B_OK)
5834 						offset = (off_t)geometry.bytes_per_sector
5835 							* geometry.sectors_per_track
5836 							* geometry.cylinder_count
5837 							* geometry.head_count;
5838 				}
5839 			}
5840 
5841 			break;
5842 		}
5843 		case SEEK_DATA:
5844 		case SEEK_HOLE:
5845 		{
5846 			status_t status = B_BAD_VALUE;
5847 			if (HAS_FS_CALL(vnode, ioctl)) {
5848 				offset = pos;
5849 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5850 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5851 					&offset, sizeof(offset));
5852 				if (status == B_OK) {
5853 					if (offset > pos)
5854 						offset -= pos;
5855 					break;
5856 				}
5857 			}
5858 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5859 				return status;
5860 
5861 			// basic implementation with stat() the node
5862 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5863 				return B_BAD_VALUE;
5864 
5865 			struct stat stat;
5866 			status = FS_CALL(vnode, read_stat, &stat);
5867 			if (status != B_OK)
5868 				return status;
5869 
5870 			off_t end = stat.st_size;
5871 			if (pos >= end)
5872 				return ENXIO;
5873 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5874 			break;
5875 		}
5876 		default:
5877 			return B_BAD_VALUE;
5878 	}
5879 
5880 	// assumes off_t is 64 bits wide
5881 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5882 		return B_BUFFER_OVERFLOW;
5883 
5884 	pos += offset;
5885 	if (pos < 0)
5886 		return B_BAD_VALUE;
5887 
5888 	return descriptor->pos = pos;
5889 }
5890 
5891 
5892 static status_t
file_select(struct file_descriptor * descriptor,uint8 event,struct selectsync * sync)5893 file_select(struct file_descriptor* descriptor, uint8 event,
5894 	struct selectsync* sync)
5895 {
5896 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5897 
5898 	struct vnode* vnode = descriptor->u.vnode;
5899 
5900 	// If the FS has no select() hook, notify select() now.
5901 	if (!HAS_FS_CALL(vnode, select)) {
5902 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5903 			notify_select_event(sync, event);
5904 		return B_UNSUPPORTED;
5905 	}
5906 
5907 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5908 }
5909 
5910 
5911 static status_t
file_deselect(struct file_descriptor * descriptor,uint8 event,struct selectsync * sync)5912 file_deselect(struct file_descriptor* descriptor, uint8 event,
5913 	struct selectsync* sync)
5914 {
5915 	struct vnode* vnode = descriptor->u.vnode;
5916 
5917 	if (!HAS_FS_CALL(vnode, deselect))
5918 		return B_OK;
5919 
5920 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5921 }
5922 
5923 
5924 static status_t
dir_create_entry_ref(dev_t mountID,ino_t parentID,const char * name,int perms,bool kernel)5925 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5926 	bool kernel)
5927 {
5928 	struct vnode* vnode;
5929 	status_t status;
5930 
5931 	if (name == NULL || *name == '\0')
5932 		return B_BAD_VALUE;
5933 
5934 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5935 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5936 
5937 	status = get_vnode(mountID, parentID, &vnode, true, false);
5938 	if (status != B_OK)
5939 		return status;
5940 
5941 	if (HAS_FS_CALL(vnode, create_dir))
5942 		status = FS_CALL(vnode, create_dir, name, perms);
5943 	else
5944 		status = B_READ_ONLY_DEVICE;
5945 
5946 	put_vnode(vnode);
5947 	return status;
5948 }
5949 
5950 
5951 static status_t
dir_create(int fd,char * path,int perms,bool kernel)5952 dir_create(int fd, char* path, int perms, bool kernel)
5953 {
5954 	char filename[B_FILE_NAME_LENGTH];
5955 	status_t status;
5956 
5957 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5958 		kernel));
5959 
5960 	VnodePutter vnode;
5961 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
5962 	if (status < 0)
5963 		return status;
5964 
5965 	if (HAS_FS_CALL(vnode, create_dir)) {
5966 		status = FS_CALL(vnode.Get(), create_dir, filename, perms);
5967 	} else
5968 		status = B_READ_ONLY_DEVICE;
5969 
5970 	return status;
5971 }
5972 
5973 
5974 static int
dir_open_entry_ref(dev_t mountID,ino_t parentID,const char * name,bool kernel)5975 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5976 {
5977 	FUNCTION(("dir_open_entry_ref()\n"));
5978 
5979 	if (name && name[0] == '\0')
5980 		return B_BAD_VALUE;
5981 
5982 	// get the vnode matching the entry_ref/node_ref
5983 	VnodePutter vnode;
5984 	status_t status;
5985 	if (name) {
5986 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5987 			vnode);
5988 	} else {
5989 		struct vnode* temp = NULL;
5990 		status = get_vnode(mountID, parentID, &temp, true, false);
5991 		vnode.SetTo(temp);
5992 	}
5993 	if (status != B_OK)
5994 		return status;
5995 
5996 	int newFD = open_dir_vnode(vnode.Get(), kernel);
5997 	if (newFD >= 0) {
5998 		cache_node_opened(vnode.Get(), vnode->cache, mountID, parentID,
5999 			vnode->id, name);
6000 
6001 		// The vnode reference has been transferred to the FD
6002 		vnode.Detach();
6003 	}
6004 
6005 	return newFD;
6006 }
6007 
6008 
6009 static int
dir_open(int fd,char * path,bool kernel)6010 dir_open(int fd, char* path, bool kernel)
6011 {
6012 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
6013 		kernel));
6014 
6015 	// get the vnode matching the vnode + path combination
6016 	VnodePutter vnode;
6017 	ino_t parentID;
6018 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, &parentID,
6019 		kernel);
6020 	if (status != B_OK)
6021 		return status;
6022 
6023 	// open the dir
6024 	int newFD = open_dir_vnode(vnode.Get(), kernel);
6025 	if (newFD >= 0) {
6026 		cache_node_opened(vnode.Get(), vnode->cache, vnode->device,
6027 			parentID, vnode->id, NULL);
6028 
6029 		// The vnode reference has been transferred to the FD
6030 		vnode.Detach();
6031 	}
6032 
6033 	return newFD;
6034 }
6035 
6036 
6037 static status_t
dir_close(struct file_descriptor * descriptor)6038 dir_close(struct file_descriptor* descriptor)
6039 {
6040 	struct vnode* vnode = descriptor->u.vnode;
6041 
6042 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
6043 
6044 	cache_node_closed(vnode, vnode->cache, vnode->device,
6045 		vnode->id);
6046 	if (HAS_FS_CALL(vnode, close_dir))
6047 		return FS_CALL(vnode, close_dir, descriptor->cookie);
6048 
6049 	return B_OK;
6050 }
6051 
6052 
6053 static void
dir_free_fd(struct file_descriptor * descriptor)6054 dir_free_fd(struct file_descriptor* descriptor)
6055 {
6056 	struct vnode* vnode = descriptor->u.vnode;
6057 
6058 	if (vnode != NULL) {
6059 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6060 		put_vnode(vnode);
6061 	}
6062 }
6063 
6064 
6065 static status_t
dir_read(struct io_context * ioContext,struct file_descriptor * descriptor,struct dirent * buffer,size_t bufferSize,uint32 * _count)6066 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6067 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6068 {
6069 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6070 		bufferSize, _count);
6071 }
6072 
6073 
6074 static status_t
fix_dirent(struct vnode * parent,struct dirent * entry,struct io_context * ioContext)6075 fix_dirent(struct vnode* parent, struct dirent* entry,
6076 	struct io_context* ioContext)
6077 {
6078 	// set d_pdev and d_pino
6079 	entry->d_pdev = parent->device;
6080 	entry->d_pino = parent->id;
6081 
6082 	// If this is the ".." entry and the directory covering another vnode,
6083 	// we need to replace d_dev and d_ino with the actual values.
6084 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6085 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6086 			ioContext);
6087 	}
6088 
6089 	// resolve covered vnodes
6090 	ReadLocker _(&sVnodeLock);
6091 
6092 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6093 	if (vnode != NULL && vnode->covered_by != NULL) {
6094 		do {
6095 			vnode = vnode->covered_by;
6096 		} while (vnode->covered_by != NULL);
6097 
6098 		entry->d_dev = vnode->device;
6099 		entry->d_ino = vnode->id;
6100 	}
6101 
6102 	return B_OK;
6103 }
6104 
6105 
6106 static status_t
dir_read(struct io_context * ioContext,struct vnode * vnode,void * cookie,struct dirent * buffer,size_t bufferSize,uint32 * _count)6107 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6108 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6109 {
6110 	if (!HAS_FS_CALL(vnode, read_dir))
6111 		return B_UNSUPPORTED;
6112 
6113 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6114 		_count);
6115 	if (error != B_OK)
6116 		return error;
6117 
6118 	// we need to adjust the read dirents
6119 	uint32 count = *_count;
6120 	for (uint32 i = 0; i < count; i++) {
6121 		error = fix_dirent(vnode, buffer, ioContext);
6122 		if (error != B_OK)
6123 			return error;
6124 
6125 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6126 	}
6127 
6128 	return error;
6129 }
6130 
6131 
6132 static status_t
dir_rewind(struct file_descriptor * descriptor)6133 dir_rewind(struct file_descriptor* descriptor)
6134 {
6135 	struct vnode* vnode = descriptor->u.vnode;
6136 
6137 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6138 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6139 	}
6140 
6141 	return B_UNSUPPORTED;
6142 }
6143 
6144 
6145 static status_t
dir_remove(int fd,char * path,bool kernel)6146 dir_remove(int fd, char* path, bool kernel)
6147 {
6148 	char name[B_FILE_NAME_LENGTH];
6149 	status_t status;
6150 
6151 	if (path != NULL) {
6152 		// we need to make sure our path name doesn't stop with "/", ".",
6153 		// or ".."
6154 		char* lastSlash;
6155 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6156 			char* leaf = lastSlash + 1;
6157 			if (!strcmp(leaf, ".."))
6158 				return B_NOT_ALLOWED;
6159 
6160 			// omit multiple slashes
6161 			while (lastSlash > path && lastSlash[-1] == '/')
6162 				lastSlash--;
6163 
6164 			if (leaf[0]
6165 				&& strcmp(leaf, ".")) {
6166 				break;
6167 			}
6168 			// "name/" -> "name", or "name/." -> "name"
6169 			lastSlash[0] = '\0';
6170 		}
6171 
6172 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6173 			return B_NOT_ALLOWED;
6174 	}
6175 
6176 	VnodePutter directory;
6177 	status = fd_and_path_to_dir_vnode(fd, path, directory, name, kernel);
6178 	if (status != B_OK)
6179 		return status;
6180 
6181 	if (HAS_FS_CALL(directory, remove_dir))
6182 		status = FS_CALL(directory.Get(), remove_dir, name);
6183 	else
6184 		status = B_READ_ONLY_DEVICE;
6185 
6186 	return status;
6187 }
6188 
6189 
6190 static status_t
common_ioctl(struct file_descriptor * descriptor,ulong op,void * buffer,size_t length)6191 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6192 	size_t length)
6193 {
6194 	struct vnode* vnode = descriptor->u.vnode;
6195 
6196 	if (HAS_FS_CALL(vnode, ioctl))
6197 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6198 
6199 	return B_DEV_INVALID_IOCTL;
6200 }
6201 
6202 
6203 static status_t
common_fcntl(int fd,int op,size_t argument,bool kernel)6204 common_fcntl(int fd, int op, size_t argument, bool kernel)
6205 {
6206 	struct flock flock;
6207 
6208 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6209 		fd, op, argument, kernel ? "kernel" : "user"));
6210 
6211 	struct io_context* context = get_current_io_context(kernel);
6212 
6213 	FileDescriptorPutter descriptor(get_fd(context, fd));
6214 	if (!descriptor.IsSet())
6215 		return B_FILE_ERROR;
6216 
6217 	struct vnode* vnode = fd_vnode(descriptor.Get());
6218 
6219 	status_t status = B_OK;
6220 
6221 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6222 		if (descriptor->ops != &sFileOps)
6223 			status = B_BAD_VALUE;
6224 		else if (kernel)
6225 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6226 		else if (user_memcpy(&flock, (struct flock*)argument,
6227 				sizeof(struct flock)) != B_OK)
6228 			status = B_BAD_ADDRESS;
6229 		if (status != B_OK)
6230 			return status;
6231 	}
6232 
6233 	switch (op) {
6234 		case F_SETFD:
6235 		{
6236 			// Set file descriptor flags
6237 
6238 			// O_CLOEXEC is the only flag available at this time
6239 			rw_lock_write_lock(&context->lock);
6240 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6241 			rw_lock_write_unlock(&context->lock);
6242 
6243 			status = B_OK;
6244 			break;
6245 		}
6246 
6247 		case F_GETFD:
6248 		{
6249 			// Get file descriptor flags
6250 			rw_lock_read_lock(&context->lock);
6251 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6252 			rw_lock_read_unlock(&context->lock);
6253 			break;
6254 		}
6255 
6256 		case F_SETFL:
6257 		{
6258 			// Set file descriptor open mode
6259 
6260 			// we only accept changes to certain flags
6261 			const int32 modifiableFlags = O_APPEND | O_NONBLOCK;
6262 			argument &= modifiableFlags;
6263 
6264 			if (descriptor->ops->fd_set_flags != NULL) {
6265 				status = descriptor->ops->fd_set_flags(descriptor.Get(), argument);
6266 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6267 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6268 					(int)argument);
6269 			} else
6270 				status = B_UNSUPPORTED;
6271 
6272 			if (status == B_OK) {
6273 				// update this descriptor's open_mode field
6274 				descriptor->open_mode = (descriptor->open_mode
6275 					& ~modifiableFlags) | argument;
6276 			}
6277 
6278 			break;
6279 		}
6280 
6281 		case F_GETFL:
6282 			// Get file descriptor open mode
6283 			status = descriptor->open_mode;
6284 			break;
6285 
6286 		case F_DUPFD:
6287 		case F_DUPFD_CLOEXEC:
6288 		{
6289 			status = new_fd_etc(context, descriptor.Get(), (int)argument);
6290 			if (status >= 0) {
6291 				rw_lock_write_lock(&context->lock);
6292 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6293 				rw_lock_write_unlock(&context->lock);
6294 
6295 				atomic_add(&descriptor->ref_count, 1);
6296 			}
6297 			break;
6298 		}
6299 
6300 		case F_GETLK:
6301 			if (vnode != NULL) {
6302 				struct flock normalizedLock;
6303 
6304 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6305 				status = normalize_flock(descriptor.Get(), &normalizedLock);
6306 				if (status != B_OK)
6307 					break;
6308 
6309 				if (HAS_FS_CALL(vnode, test_lock)) {
6310 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6311 						&normalizedLock);
6312 				} else
6313 					status = test_advisory_lock(vnode, &normalizedLock);
6314 				if (status == B_OK) {
6315 					if (normalizedLock.l_type == F_UNLCK) {
6316 						// no conflicting lock found, copy back the same struct
6317 						// we were given except change type to F_UNLCK
6318 						flock.l_type = F_UNLCK;
6319 						if (kernel) {
6320 							memcpy((struct flock*)argument, &flock,
6321 								sizeof(struct flock));
6322 						} else {
6323 							status = user_memcpy((struct flock*)argument,
6324 								&flock, sizeof(struct flock));
6325 						}
6326 					} else {
6327 						// a conflicting lock was found, copy back its range and
6328 						// type
6329 						if (normalizedLock.l_len == OFF_MAX)
6330 							normalizedLock.l_len = 0;
6331 
6332 						if (kernel) {
6333 							memcpy((struct flock*)argument,
6334 								&normalizedLock, sizeof(struct flock));
6335 						} else {
6336 							status = user_memcpy((struct flock*)argument,
6337 								&normalizedLock, sizeof(struct flock));
6338 						}
6339 					}
6340 				}
6341 			} else
6342 				status = B_BAD_VALUE;
6343 			break;
6344 
6345 		case F_SETLK:
6346 		case F_SETLKW:
6347 			status = normalize_flock(descriptor.Get(), &flock);
6348 			if (status != B_OK)
6349 				break;
6350 
6351 			if (vnode == NULL) {
6352 				status = B_BAD_VALUE;
6353 			} else if (flock.l_type == F_UNLCK) {
6354 				if (HAS_FS_CALL(vnode, release_lock)) {
6355 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6356 						&flock);
6357 				} else {
6358 					status = release_advisory_lock(vnode, context, NULL,
6359 						&flock);
6360 				}
6361 			} else {
6362 				// the open mode must match the lock type
6363 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6364 						&& flock.l_type == F_WRLCK)
6365 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6366 						&& flock.l_type == F_RDLCK))
6367 					status = B_FILE_ERROR;
6368 				else {
6369 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6370 						status = FS_CALL(vnode, acquire_lock,
6371 							descriptor->cookie, &flock, op == F_SETLKW);
6372 					} else {
6373 						status = acquire_advisory_lock(vnode, context, NULL,
6374 							&flock, op == F_SETLKW);
6375 					}
6376 				}
6377 			}
6378 			break;
6379 
6380 		// ToDo: add support for more ops?
6381 
6382 		default:
6383 			status = B_BAD_VALUE;
6384 	}
6385 
6386 	return status;
6387 }
6388 
6389 
6390 static status_t
common_sync(int fd,bool kernel)6391 common_sync(int fd, bool kernel)
6392 {
6393 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6394 
6395 	struct vnode* vnode;
6396 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6397 	if (!descriptor.IsSet())
6398 		return B_FILE_ERROR;
6399 
6400 	status_t status;
6401 	if (HAS_FS_CALL(vnode, fsync))
6402 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6403 	else
6404 		status = B_UNSUPPORTED;
6405 
6406 	return status;
6407 }
6408 
6409 
6410 static status_t
common_lock_node(int fd,bool kernel)6411 common_lock_node(int fd, bool kernel)
6412 {
6413 	struct vnode* vnode;
6414 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6415 	if (!descriptor.IsSet())
6416 		return B_FILE_ERROR;
6417 
6418 	status_t status = B_OK;
6419 
6420 	// We need to set the locking atomically - someone
6421 	// else might set one at the same time
6422 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6423 			descriptor.Get(), (file_descriptor*)NULL) != NULL)
6424 		status = B_BUSY;
6425 
6426 	return status;
6427 }
6428 
6429 
6430 static status_t
common_unlock_node(int fd,bool kernel)6431 common_unlock_node(int fd, bool kernel)
6432 {
6433 	struct vnode* vnode;
6434 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6435 	if (!descriptor.IsSet())
6436 		return B_FILE_ERROR;
6437 
6438 	status_t status = B_OK;
6439 
6440 	// We need to set the locking atomically - someone
6441 	// else might set one at the same time
6442 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6443 			(file_descriptor*)NULL, descriptor.Get()) != descriptor.Get())
6444 		status = B_BAD_VALUE;
6445 
6446 	return status;
6447 }
6448 
6449 
6450 static status_t
common_preallocate(int fd,off_t offset,off_t length,bool kernel)6451 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6452 {
6453 	if (offset < 0 || length == 0)
6454 		return B_BAD_VALUE;
6455 	if (offset > OFF_MAX - length)
6456 		return B_FILE_TOO_LARGE;
6457 
6458 	struct vnode* vnode;
6459 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6460 	if (!descriptor.IsSet() || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6461 		return B_FILE_ERROR;
6462 
6463 	switch (vnode->Type() & S_IFMT) {
6464 		case S_IFIFO:
6465 		case S_IFSOCK:
6466 			return ESPIPE;
6467 
6468 		case S_IFBLK:
6469 		case S_IFCHR:
6470 		case S_IFDIR:
6471 		case S_IFLNK:
6472 			return B_DEVICE_NOT_FOUND;
6473 
6474 		case S_IFREG:
6475 			break;
6476 	}
6477 
6478 	status_t status = B_OK;
6479 	if (HAS_FS_CALL(vnode, preallocate)) {
6480 		status = FS_CALL(vnode, preallocate, offset, length);
6481 	} else {
6482 		status = HAS_FS_CALL(vnode, write)
6483 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6484 	}
6485 
6486 	return status;
6487 }
6488 
6489 
6490 static status_t
common_read_link(int fd,char * path,char * buffer,size_t * _bufferSize,bool kernel)6491 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6492 	bool kernel)
6493 {
6494 	VnodePutter vnode;
6495 	status_t status;
6496 
6497 	status = fd_and_path_to_vnode(fd, path, false, vnode, NULL, kernel);
6498 	if (status != B_OK)
6499 		return status;
6500 
6501 	if (HAS_FS_CALL(vnode, read_symlink)) {
6502 		status = FS_CALL(vnode.Get(), read_symlink, buffer, _bufferSize);
6503 	} else
6504 		status = B_BAD_VALUE;
6505 
6506 	return status;
6507 }
6508 
6509 
6510 static status_t
common_create_symlink(int fd,char * path,const char * toPath,int mode,bool kernel)6511 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6512 	bool kernel)
6513 {
6514 	// path validity checks have to be in the calling function!
6515 	char name[B_FILE_NAME_LENGTH];
6516 	status_t status;
6517 
6518 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6519 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6520 
6521 	VnodePutter vnode;
6522 	status = fd_and_path_to_dir_vnode(fd, path, vnode, name, kernel);
6523 	if (status != B_OK)
6524 		return status;
6525 
6526 	if (HAS_FS_CALL(vnode, create_symlink))
6527 		status = FS_CALL(vnode.Get(), create_symlink, name, toPath, mode);
6528 	else {
6529 		status = HAS_FS_CALL(vnode, write)
6530 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6531 	}
6532 
6533 	return status;
6534 }
6535 
6536 
6537 static status_t
common_create_link(int pathFD,char * path,int toFD,char * toPath,bool traverseLeafLink,bool kernel)6538 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6539 	bool traverseLeafLink, bool kernel)
6540 {
6541 	// path validity checks have to be in the calling function!
6542 
6543 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6544 		toPath, kernel));
6545 
6546 	char name[B_FILE_NAME_LENGTH];
6547 	VnodePutter directory;
6548 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, directory, name,
6549 		kernel);
6550 	if (status != B_OK)
6551 		return status;
6552 
6553 	VnodePutter vnode;
6554 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, vnode, NULL,
6555 		kernel);
6556 	if (status != B_OK)
6557 		return status;
6558 
6559 	if (directory->mount != vnode->mount)
6560 		return B_CROSS_DEVICE_LINK;
6561 
6562 	if (HAS_FS_CALL(directory, link))
6563 		status = FS_CALL(directory.Get(), link, name, vnode.Get());
6564 	else
6565 		status = B_READ_ONLY_DEVICE;
6566 
6567 	return status;
6568 }
6569 
6570 
6571 static status_t
common_unlink(int fd,char * path,bool kernel)6572 common_unlink(int fd, char* path, bool kernel)
6573 {
6574 	char filename[B_FILE_NAME_LENGTH];
6575 	status_t status;
6576 
6577 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6578 		kernel));
6579 
6580 	VnodePutter vnode;
6581 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
6582 	if (status < 0)
6583 		return status;
6584 
6585 	if (HAS_FS_CALL(vnode, unlink))
6586 		status = FS_CALL(vnode.Get(), unlink, filename);
6587 	else
6588 		status = B_READ_ONLY_DEVICE;
6589 
6590 	return status;
6591 }
6592 
6593 
6594 static status_t
common_access(int fd,char * path,int mode,bool effectiveUserGroup,bool kernel)6595 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6596 {
6597 	status_t status;
6598 
6599 	// TODO: honor effectiveUserGroup argument
6600 
6601 	VnodePutter vnode;
6602 	status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
6603 	if (status != B_OK)
6604 		return status;
6605 
6606 	if (HAS_FS_CALL(vnode, access))
6607 		status = FS_CALL(vnode.Get(), access, mode);
6608 	else
6609 		status = B_OK;
6610 
6611 	return status;
6612 }
6613 
6614 
6615 static status_t
common_rename(int fd,char * path,int newFD,char * newPath,bool kernel)6616 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6617 {
6618 	status_t status;
6619 
6620 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6621 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6622 
6623 	VnodePutter fromVnode;
6624 	char fromName[B_FILE_NAME_LENGTH];
6625 	status = fd_and_path_to_dir_vnode(fd, path, fromVnode, fromName, kernel);
6626 	if (status != B_OK)
6627 		return status;
6628 
6629 	VnodePutter toVnode;
6630 	char toName[B_FILE_NAME_LENGTH];
6631 	status = fd_and_path_to_dir_vnode(newFD, newPath, toVnode, toName, kernel);
6632 	if (status != B_OK)
6633 		return status;
6634 
6635 	if (fromVnode->device != toVnode->device)
6636 		return B_CROSS_DEVICE_LINK;
6637 
6638 	if (fromVnode.Get() == toVnode.Get() && !strcmp(fromName, toName))
6639 		return B_OK;
6640 
6641 	if (fromName[0] == '\0' || toName[0] == '\0'
6642 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6643 		|| !strcmp(toName, ".") || !strcmp(toName, "..")) {
6644 		return B_BAD_VALUE;
6645 	}
6646 
6647 	if (HAS_FS_CALL(fromVnode, rename))
6648 		status = FS_CALL(fromVnode.Get(), rename, fromName, toVnode.Get(), toName);
6649 	else
6650 		status = B_READ_ONLY_DEVICE;
6651 
6652 	return status;
6653 }
6654 
6655 
6656 static status_t
common_read_stat(struct file_descriptor * descriptor,struct stat * stat)6657 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6658 {
6659 	struct vnode* vnode = descriptor->u.vnode;
6660 
6661 	FUNCTION(("common_read_stat: stat %p\n", stat));
6662 
6663 	// TODO: remove this once all file systems properly set them!
6664 	stat->st_crtim.tv_nsec = 0;
6665 	stat->st_ctim.tv_nsec = 0;
6666 	stat->st_mtim.tv_nsec = 0;
6667 	stat->st_atim.tv_nsec = 0;
6668 
6669 	return vfs_stat_vnode(vnode, stat);
6670 }
6671 
6672 
6673 static status_t
common_write_stat(struct file_descriptor * descriptor,const struct stat * stat,int statMask)6674 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6675 	int statMask)
6676 {
6677 	struct vnode* vnode = descriptor->u.vnode;
6678 
6679 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6680 		vnode, stat, statMask));
6681 
6682 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6683 		&& (statMask & B_STAT_SIZE) != 0) {
6684 		return B_BAD_VALUE;
6685 	}
6686 
6687 	if (!HAS_FS_CALL(vnode, write_stat))
6688 		return B_READ_ONLY_DEVICE;
6689 
6690 	return FS_CALL(vnode, write_stat, stat, statMask);
6691 }
6692 
6693 
6694 static status_t
common_path_read_stat(int fd,char * path,bool traverseLeafLink,struct stat * stat,bool kernel)6695 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6696 	struct stat* stat, bool kernel)
6697 {
6698 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6699 		stat));
6700 
6701 	VnodePutter vnode;
6702 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6703 		NULL, kernel);
6704 	if (status != B_OK)
6705 		return status;
6706 
6707 	status = vfs_stat_vnode(vnode.Get(), stat);
6708 
6709 	return status;
6710 }
6711 
6712 
6713 static status_t
common_path_write_stat(int fd,char * path,bool traverseLeafLink,const struct stat * stat,int statMask,bool kernel)6714 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6715 	const struct stat* stat, int statMask, bool kernel)
6716 {
6717 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6718 		"kernel %d\n", fd, path, stat, statMask, kernel));
6719 
6720 	VnodePutter vnode;
6721 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6722 		NULL, kernel);
6723 	if (status != B_OK)
6724 		return status;
6725 
6726 	if (HAS_FS_CALL(vnode, write_stat))
6727 		status = FS_CALL(vnode.Get(), write_stat, stat, statMask);
6728 	else
6729 		status = B_READ_ONLY_DEVICE;
6730 
6731 	return status;
6732 }
6733 
6734 
6735 static int
attr_dir_open(int fd,char * path,bool traverseLeafLink,bool kernel)6736 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6737 {
6738 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6739 		kernel));
6740 
6741 	VnodePutter vnode;
6742 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6743 		NULL, kernel);
6744 	if (status != B_OK)
6745 		return status;
6746 
6747 	status = open_attr_dir_vnode(vnode.Get(), kernel);
6748 	if (status >= 0)
6749 		vnode.Detach();
6750 
6751 	return status;
6752 }
6753 
6754 
6755 static status_t
attr_dir_close(struct file_descriptor * descriptor)6756 attr_dir_close(struct file_descriptor* descriptor)
6757 {
6758 	struct vnode* vnode = descriptor->u.vnode;
6759 
6760 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6761 
6762 	if (HAS_FS_CALL(vnode, close_attr_dir))
6763 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6764 
6765 	return B_OK;
6766 }
6767 
6768 
6769 static void
attr_dir_free_fd(struct file_descriptor * descriptor)6770 attr_dir_free_fd(struct file_descriptor* descriptor)
6771 {
6772 	struct vnode* vnode = descriptor->u.vnode;
6773 
6774 	if (vnode != NULL) {
6775 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6776 		put_vnode(vnode);
6777 	}
6778 }
6779 
6780 
6781 static status_t
attr_dir_read(struct io_context * ioContext,struct file_descriptor * descriptor,struct dirent * buffer,size_t bufferSize,uint32 * _count)6782 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6783 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6784 {
6785 	struct vnode* vnode = descriptor->u.vnode;
6786 
6787 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6788 
6789 	if (HAS_FS_CALL(vnode, read_attr_dir))
6790 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6791 			bufferSize, _count);
6792 
6793 	return B_UNSUPPORTED;
6794 }
6795 
6796 
6797 static status_t
attr_dir_rewind(struct file_descriptor * descriptor)6798 attr_dir_rewind(struct file_descriptor* descriptor)
6799 {
6800 	struct vnode* vnode = descriptor->u.vnode;
6801 
6802 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6803 
6804 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6805 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6806 
6807 	return B_UNSUPPORTED;
6808 }
6809 
6810 
6811 static int
attr_create(int fd,char * path,const char * name,uint32 type,int openMode,bool kernel)6812 attr_create(int fd, char* path, const char* name, uint32 type,
6813 	int openMode, bool kernel)
6814 {
6815 	if (name == NULL || *name == '\0')
6816 		return B_BAD_VALUE;
6817 
6818 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6819 	VnodePutter vnode;
6820 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6821 		kernel);
6822 	if (status != B_OK)
6823 		return status;
6824 
6825 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6826 		return B_LINK_LIMIT;
6827 
6828 	if (!HAS_FS_CALL(vnode, create_attr))
6829 		return B_READ_ONLY_DEVICE;
6830 
6831 	void* cookie;
6832 	status = FS_CALL(vnode.Get(), create_attr, name, type, openMode, &cookie);
6833 	if (status != B_OK)
6834 		return status;
6835 
6836 	fd = get_new_fd(&sAttributeOps, NULL, vnode.Get(), cookie, openMode, kernel);
6837 	if (fd >= 0) {
6838 		vnode.Detach();
6839 		return fd;
6840 	}
6841 
6842 	status = fd;
6843 
6844 	FS_CALL(vnode.Get(), close_attr, cookie);
6845 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6846 
6847 	FS_CALL(vnode.Get(), remove_attr, name);
6848 
6849 	return status;
6850 }
6851 
6852 
6853 static int
attr_open(int fd,char * path,const char * name,int openMode,bool kernel)6854 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6855 {
6856 	if (name == NULL || *name == '\0')
6857 		return B_BAD_VALUE;
6858 
6859 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6860 	VnodePutter vnode;
6861 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6862 		kernel);
6863 	if (status != B_OK)
6864 		return status;
6865 
6866 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6867 		return B_LINK_LIMIT;
6868 
6869 	if (!HAS_FS_CALL(vnode, open_attr))
6870 		return B_UNSUPPORTED;
6871 
6872 	void* cookie;
6873 	status = FS_CALL(vnode.Get(), open_attr, name, openMode, &cookie);
6874 	if (status != B_OK)
6875 		return status;
6876 
6877 	// now we only need a file descriptor for this attribute and we're done
6878 	fd = get_new_fd(&sAttributeOps, NULL, vnode.Get(), cookie, openMode, kernel);
6879 	if (fd >= 0) {
6880 		vnode.Detach();
6881 		return fd;
6882 	}
6883 
6884 	status = fd;
6885 
6886 	FS_CALL(vnode.Get(), close_attr, cookie);
6887 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6888 
6889 	return status;
6890 }
6891 
6892 
6893 static status_t
attr_close(struct file_descriptor * descriptor)6894 attr_close(struct file_descriptor* descriptor)
6895 {
6896 	struct vnode* vnode = descriptor->u.vnode;
6897 
6898 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6899 
6900 	if (HAS_FS_CALL(vnode, close_attr))
6901 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6902 
6903 	return B_OK;
6904 }
6905 
6906 
6907 static void
attr_free_fd(struct file_descriptor * descriptor)6908 attr_free_fd(struct file_descriptor* descriptor)
6909 {
6910 	struct vnode* vnode = descriptor->u.vnode;
6911 
6912 	if (vnode != NULL) {
6913 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6914 		put_vnode(vnode);
6915 	}
6916 }
6917 
6918 
6919 static status_t
attr_read(struct file_descriptor * descriptor,off_t pos,void * buffer,size_t * length)6920 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6921 	size_t* length)
6922 {
6923 	struct vnode* vnode = descriptor->u.vnode;
6924 
6925 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6926 		pos, length, *length));
6927 
6928 	if (!HAS_FS_CALL(vnode, read_attr))
6929 		return B_UNSUPPORTED;
6930 
6931 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6932 }
6933 
6934 
6935 static status_t
attr_write(struct file_descriptor * descriptor,off_t pos,const void * buffer,size_t * length)6936 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6937 	size_t* length)
6938 {
6939 	struct vnode* vnode = descriptor->u.vnode;
6940 
6941 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6942 		length));
6943 
6944 	if (!HAS_FS_CALL(vnode, write_attr))
6945 		return B_UNSUPPORTED;
6946 
6947 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6948 }
6949 
6950 
6951 static off_t
attr_seek(struct file_descriptor * descriptor,off_t pos,int seekType)6952 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6953 {
6954 	off_t offset;
6955 
6956 	switch (seekType) {
6957 		case SEEK_SET:
6958 			offset = 0;
6959 			break;
6960 		case SEEK_CUR:
6961 			offset = descriptor->pos;
6962 			break;
6963 		case SEEK_END:
6964 		{
6965 			struct vnode* vnode = descriptor->u.vnode;
6966 			if (!HAS_FS_CALL(vnode, read_stat))
6967 				return B_UNSUPPORTED;
6968 
6969 			struct stat stat;
6970 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6971 				&stat);
6972 			if (status != B_OK)
6973 				return status;
6974 
6975 			offset = stat.st_size;
6976 			break;
6977 		}
6978 		default:
6979 			return B_BAD_VALUE;
6980 	}
6981 
6982 	// assumes off_t is 64 bits wide
6983 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6984 		return B_BUFFER_OVERFLOW;
6985 
6986 	pos += offset;
6987 	if (pos < 0)
6988 		return B_BAD_VALUE;
6989 
6990 	return descriptor->pos = pos;
6991 }
6992 
6993 
6994 static status_t
attr_read_stat(struct file_descriptor * descriptor,struct stat * stat)6995 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6996 {
6997 	struct vnode* vnode = descriptor->u.vnode;
6998 
6999 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
7000 
7001 	if (!HAS_FS_CALL(vnode, read_attr_stat))
7002 		return B_UNSUPPORTED;
7003 
7004 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
7005 }
7006 
7007 
7008 static status_t
attr_write_stat(struct file_descriptor * descriptor,const struct stat * stat,int statMask)7009 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
7010 	int statMask)
7011 {
7012 	struct vnode* vnode = descriptor->u.vnode;
7013 
7014 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
7015 
7016 	if (!HAS_FS_CALL(vnode, write_attr_stat))
7017 		return B_READ_ONLY_DEVICE;
7018 
7019 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
7020 }
7021 
7022 
7023 static status_t
attr_remove(int fd,const char * name,bool kernel)7024 attr_remove(int fd, const char* name, bool kernel)
7025 {
7026 	if (name == NULL || *name == '\0')
7027 		return B_BAD_VALUE;
7028 
7029 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
7030 		kernel));
7031 
7032 	struct vnode* vnode;
7033 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
7034 	if (!descriptor.IsSet())
7035 		return B_FILE_ERROR;
7036 
7037 	status_t status;
7038 	if (HAS_FS_CALL(vnode, remove_attr))
7039 		status = FS_CALL(vnode, remove_attr, name);
7040 	else
7041 		status = B_READ_ONLY_DEVICE;
7042 
7043 	return status;
7044 }
7045 
7046 
7047 static status_t
attr_rename(int fromFD,const char * fromName,int toFD,const char * toName,bool kernel)7048 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
7049 	bool kernel)
7050 {
7051 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7052 		|| *toName == '\0')
7053 		return B_BAD_VALUE;
7054 
7055 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7056 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7057 
7058 	struct vnode* fromVnode;
7059 	FileDescriptorPutter fromDescriptor(get_fd_and_vnode(fromFD, &fromVnode, kernel));
7060 	if (!fromDescriptor.IsSet())
7061 		return B_FILE_ERROR;
7062 
7063 	struct vnode* toVnode;
7064 	FileDescriptorPutter toDescriptor(get_fd_and_vnode(toFD, &toVnode, kernel));
7065 	if (!toDescriptor.IsSet())
7066 		return B_FILE_ERROR;
7067 
7068 	// are the files on the same volume?
7069 	if (fromVnode->device != toVnode->device)
7070 		return B_CROSS_DEVICE_LINK;
7071 
7072 	status_t status;
7073 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7074 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7075 	} else
7076 		status = B_READ_ONLY_DEVICE;
7077 
7078 	return status;
7079 }
7080 
7081 
7082 static int
index_dir_open(dev_t mountID,bool kernel)7083 index_dir_open(dev_t mountID, bool kernel)
7084 {
7085 	struct fs_mount* mount;
7086 	void* cookie;
7087 
7088 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7089 		kernel));
7090 
7091 	status_t status = get_mount(mountID, &mount);
7092 	if (status != B_OK)
7093 		return status;
7094 
7095 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7096 		status = B_UNSUPPORTED;
7097 		goto error;
7098 	}
7099 
7100 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7101 	if (status != B_OK)
7102 		goto error;
7103 
7104 	// get fd for the index directory
7105 	int fd;
7106 	fd = get_new_fd(&sIndexDirectoryOps, mount, NULL, cookie, O_CLOEXEC, kernel);
7107 	if (fd >= 0)
7108 		return fd;
7109 
7110 	// something went wrong
7111 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7112 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7113 
7114 	status = fd;
7115 
7116 error:
7117 	put_mount(mount);
7118 	return status;
7119 }
7120 
7121 
7122 static status_t
index_dir_close(struct file_descriptor * descriptor)7123 index_dir_close(struct file_descriptor* descriptor)
7124 {
7125 	struct fs_mount* mount = descriptor->u.mount;
7126 
7127 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7128 
7129 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7130 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7131 
7132 	return B_OK;
7133 }
7134 
7135 
7136 static void
index_dir_free_fd(struct file_descriptor * descriptor)7137 index_dir_free_fd(struct file_descriptor* descriptor)
7138 {
7139 	struct fs_mount* mount = descriptor->u.mount;
7140 
7141 	if (mount != NULL) {
7142 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7143 		put_mount(mount);
7144 	}
7145 }
7146 
7147 
7148 static status_t
index_dir_read(struct io_context * ioContext,struct file_descriptor * descriptor,struct dirent * buffer,size_t bufferSize,uint32 * _count)7149 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7150 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7151 {
7152 	struct fs_mount* mount = descriptor->u.mount;
7153 
7154 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7155 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7156 			bufferSize, _count);
7157 	}
7158 
7159 	return B_UNSUPPORTED;
7160 }
7161 
7162 
7163 static status_t
index_dir_rewind(struct file_descriptor * descriptor)7164 index_dir_rewind(struct file_descriptor* descriptor)
7165 {
7166 	struct fs_mount* mount = descriptor->u.mount;
7167 
7168 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7169 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7170 
7171 	return B_UNSUPPORTED;
7172 }
7173 
7174 
7175 static status_t
index_create(dev_t mountID,const char * name,uint32 type,uint32 flags,bool kernel)7176 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7177 	bool kernel)
7178 {
7179 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7180 		mountID, name, kernel));
7181 
7182 	struct fs_mount* mount;
7183 	status_t status = get_mount(mountID, &mount);
7184 	if (status != B_OK)
7185 		return status;
7186 
7187 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7188 		status = B_READ_ONLY_DEVICE;
7189 		goto out;
7190 	}
7191 
7192 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7193 
7194 out:
7195 	put_mount(mount);
7196 	return status;
7197 }
7198 
7199 
7200 #if 0
7201 static status_t
7202 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7203 {
7204 	struct vnode* vnode = descriptor->u.vnode;
7205 
7206 	// ToDo: currently unused!
7207 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7208 	if (!HAS_FS_CALL(vnode, read_index_stat))
7209 		return B_UNSUPPORTED;
7210 
7211 	return B_UNSUPPORTED;
7212 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7213 }
7214 
7215 
7216 static void
7217 index_free_fd(struct file_descriptor* descriptor)
7218 {
7219 	struct vnode* vnode = descriptor->u.vnode;
7220 
7221 	if (vnode != NULL) {
7222 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7223 		put_vnode(vnode);
7224 	}
7225 }
7226 #endif
7227 
7228 
7229 static status_t
index_name_read_stat(dev_t mountID,const char * name,struct stat * stat,bool kernel)7230 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7231 	bool kernel)
7232 {
7233 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7234 		mountID, name, kernel));
7235 
7236 	struct fs_mount* mount;
7237 	status_t status = get_mount(mountID, &mount);
7238 	if (status != B_OK)
7239 		return status;
7240 
7241 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7242 		status = B_UNSUPPORTED;
7243 		goto out;
7244 	}
7245 
7246 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7247 
7248 out:
7249 	put_mount(mount);
7250 	return status;
7251 }
7252 
7253 
7254 static status_t
index_remove(dev_t mountID,const char * name,bool kernel)7255 index_remove(dev_t mountID, const char* name, bool kernel)
7256 {
7257 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7258 		mountID, name, kernel));
7259 
7260 	struct fs_mount* mount;
7261 	status_t status = get_mount(mountID, &mount);
7262 	if (status != B_OK)
7263 		return status;
7264 
7265 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7266 		status = B_READ_ONLY_DEVICE;
7267 		goto out;
7268 	}
7269 
7270 	status = FS_MOUNT_CALL(mount, remove_index, name);
7271 
7272 out:
7273 	put_mount(mount);
7274 	return status;
7275 }
7276 
7277 
7278 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7279 		It would be nice if the FS would find some more kernel support
7280 		for them.
7281 		For example, query parsing should be moved into the kernel.
7282 */
7283 static int
query_open(dev_t device,const char * query,uint32 flags,port_id port,int32 token,bool kernel)7284 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7285 	int32 token, bool kernel)
7286 {
7287 	struct fs_mount* mount;
7288 	void* cookie;
7289 
7290 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7291 		device, query, kernel));
7292 
7293 	status_t status = get_mount(device, &mount);
7294 	if (status != B_OK)
7295 		return status;
7296 
7297 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7298 		status = B_UNSUPPORTED;
7299 		goto error;
7300 	}
7301 
7302 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7303 		&cookie);
7304 	if (status != B_OK)
7305 		goto error;
7306 
7307 	// get fd for the index directory
7308 	int fd;
7309 	fd = get_new_fd(&sQueryOps, mount, NULL, cookie, O_CLOEXEC, kernel);
7310 	if (fd >= 0)
7311 		return fd;
7312 
7313 	status = fd;
7314 
7315 	// something went wrong
7316 	FS_MOUNT_CALL(mount, close_query, cookie);
7317 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7318 
7319 error:
7320 	put_mount(mount);
7321 	return status;
7322 }
7323 
7324 
7325 static status_t
query_close(struct file_descriptor * descriptor)7326 query_close(struct file_descriptor* descriptor)
7327 {
7328 	struct fs_mount* mount = descriptor->u.mount;
7329 
7330 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7331 
7332 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7333 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7334 
7335 	return B_OK;
7336 }
7337 
7338 
7339 static void
query_free_fd(struct file_descriptor * descriptor)7340 query_free_fd(struct file_descriptor* descriptor)
7341 {
7342 	struct fs_mount* mount = descriptor->u.mount;
7343 
7344 	if (mount != NULL) {
7345 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7346 		put_mount(mount);
7347 	}
7348 }
7349 
7350 
7351 static status_t
query_read(struct io_context * ioContext,struct file_descriptor * descriptor,struct dirent * buffer,size_t bufferSize,uint32 * _count)7352 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7353 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7354 {
7355 	struct fs_mount* mount = descriptor->u.mount;
7356 
7357 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7358 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7359 			bufferSize, _count);
7360 	}
7361 
7362 	return B_UNSUPPORTED;
7363 }
7364 
7365 
7366 static status_t
query_rewind(struct file_descriptor * descriptor)7367 query_rewind(struct file_descriptor* descriptor)
7368 {
7369 	struct fs_mount* mount = descriptor->u.mount;
7370 
7371 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7372 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7373 
7374 	return B_UNSUPPORTED;
7375 }
7376 
7377 
7378 //	#pragma mark - General File System functions
7379 
7380 
7381 static dev_t
fs_mount(char * path,const char * device,const char * fsName,uint32 flags,const char * args,bool kernel)7382 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7383 	const char* args, bool kernel)
7384 {
7385 	struct ::fs_mount* mount;
7386 	status_t status = B_OK;
7387 	fs_volume* volume = NULL;
7388 	int32 layer = 0;
7389 	Vnode* coveredNode = NULL;
7390 
7391 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7392 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7393 
7394 	// The path is always safe, we just have to make sure that fsName is
7395 	// almost valid - we can't make any assumptions about args, though.
7396 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7397 	// We'll get it from the DDM later.
7398 	if (fsName == NULL) {
7399 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7400 			return B_BAD_VALUE;
7401 	} else if (fsName[0] == '\0')
7402 		return B_BAD_VALUE;
7403 
7404 	RecursiveLocker mountOpLocker(sMountOpLock);
7405 
7406 	// Helper to delete a newly created file device on failure.
7407 	// Not exactly beautiful, but helps to keep the code below cleaner.
7408 	struct FileDeviceDeleter {
7409 		FileDeviceDeleter() : id(-1) {}
7410 		~FileDeviceDeleter()
7411 		{
7412 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7413 		}
7414 
7415 		partition_id id;
7416 	} fileDeviceDeleter;
7417 
7418 	// If the file system is not a "virtual" one, the device argument should
7419 	// point to a real file/device (if given at all).
7420 	// get the partition
7421 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7422 	KPartition* partition = NULL;
7423 	KPath normalizedDevice;
7424 	bool newlyCreatedFileDevice = false;
7425 
7426 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7427 		// normalize the device path
7428 		status = normalizedDevice.SetTo(device, true);
7429 		if (status != B_OK)
7430 			return status;
7431 
7432 		// get a corresponding partition from the DDM
7433 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7434 		if (partition == NULL) {
7435 			// Partition not found: This either means, the user supplied
7436 			// an invalid path, or the path refers to an image file. We try
7437 			// to let the DDM create a file device for the path.
7438 			partition_id deviceID = ddm->CreateFileDevice(
7439 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7440 			if (deviceID >= 0) {
7441 				partition = ddm->RegisterPartition(deviceID);
7442 				if (newlyCreatedFileDevice)
7443 					fileDeviceDeleter.id = deviceID;
7444 			}
7445 		}
7446 
7447 		if (!partition) {
7448 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7449 				normalizedDevice.Path()));
7450 			return B_ENTRY_NOT_FOUND;
7451 		}
7452 
7453 		device = normalizedDevice.Path();
7454 			// correct path to file device
7455 	}
7456 	PartitionRegistrar partitionRegistrar(partition, true);
7457 
7458 	// Write lock the partition's device. For the time being, we keep the lock
7459 	// until we're done mounting -- not nice, but ensure, that no-one is
7460 	// interfering.
7461 	// TODO: Just mark the partition busy while mounting!
7462 	KDiskDevice* diskDevice = NULL;
7463 	if (partition) {
7464 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7465 		if (!diskDevice) {
7466 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7467 			return B_ERROR;
7468 		}
7469 	}
7470 
7471 	DeviceWriteLocker writeLocker(diskDevice, true);
7472 		// this takes over the write lock acquired before
7473 
7474 	if (partition != NULL) {
7475 		// make sure, that the partition is not busy
7476 		if (partition->IsBusy()) {
7477 			TRACE(("fs_mount(): Partition is busy.\n"));
7478 			return B_BUSY;
7479 		}
7480 
7481 		// if no FS name had been supplied, we get it from the partition
7482 		if (fsName == NULL) {
7483 			KDiskSystem* diskSystem = partition->DiskSystem();
7484 			if (!diskSystem) {
7485 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7486 					"recognize it.\n"));
7487 				return B_BAD_VALUE;
7488 			}
7489 
7490 			if (!diskSystem->IsFileSystem()) {
7491 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7492 					"partitioning system.\n"));
7493 				return B_BAD_VALUE;
7494 			}
7495 
7496 			// The disk system name will not change, and the KDiskSystem
7497 			// object will not go away while the disk device is locked (and
7498 			// the partition has a reference to it), so this is safe.
7499 			fsName = diskSystem->Name();
7500 		}
7501 	}
7502 
7503 	mount = new(std::nothrow) (struct ::fs_mount);
7504 	if (mount == NULL)
7505 		return B_NO_MEMORY;
7506 
7507 	mount->device_name = strdup(device);
7508 		// "device" can be NULL
7509 
7510 	status = mount->entry_cache.Init();
7511 	if (status != B_OK)
7512 		goto err1;
7513 
7514 	// initialize structure
7515 	mount->id = sNextMountID++;
7516 	mount->partition = NULL;
7517 	mount->root_vnode = NULL;
7518 	mount->covers_vnode = NULL;
7519 	mount->unmounting = false;
7520 	mount->owns_file_device = false;
7521 	mount->volume = NULL;
7522 
7523 	// build up the volume(s)
7524 	while (true) {
7525 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7526 		if (layerFSName == NULL) {
7527 			if (layer == 0) {
7528 				status = B_NO_MEMORY;
7529 				goto err1;
7530 			}
7531 
7532 			break;
7533 		}
7534 		MemoryDeleter layerFSNameDeleter(layerFSName);
7535 
7536 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7537 		if (volume == NULL) {
7538 			status = B_NO_MEMORY;
7539 			goto err1;
7540 		}
7541 
7542 		volume->id = mount->id;
7543 		volume->partition = partition != NULL ? partition->ID() : -1;
7544 		volume->layer = layer++;
7545 		volume->private_volume = NULL;
7546 		volume->ops = NULL;
7547 		volume->sub_volume = NULL;
7548 		volume->super_volume = NULL;
7549 		volume->file_system = NULL;
7550 		volume->file_system_name = NULL;
7551 
7552 		volume->file_system_name = get_file_system_name(layerFSName);
7553 		if (volume->file_system_name == NULL) {
7554 			status = B_NO_MEMORY;
7555 			free(volume);
7556 			goto err1;
7557 		}
7558 
7559 		volume->file_system = get_file_system(layerFSName);
7560 		if (volume->file_system == NULL) {
7561 			status = B_DEVICE_NOT_FOUND;
7562 			free(volume->file_system_name);
7563 			free(volume);
7564 			goto err1;
7565 		}
7566 
7567 		if (mount->volume == NULL)
7568 			mount->volume = volume;
7569 		else {
7570 			volume->super_volume = mount->volume;
7571 			mount->volume->sub_volume = volume;
7572 			mount->volume = volume;
7573 		}
7574 	}
7575 
7576 	// insert mount struct into list before we call FS's mount() function
7577 	// so that vnodes can be created for this mount
7578 	rw_lock_write_lock(&sMountLock);
7579 	sMountsTable->Insert(mount);
7580 	rw_lock_write_unlock(&sMountLock);
7581 
7582 	ino_t rootID;
7583 
7584 	if (!sRoot) {
7585 		// we haven't mounted anything yet
7586 		if (strcmp(path, "/") != 0) {
7587 			status = B_ERROR;
7588 			goto err2;
7589 		}
7590 
7591 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7592 			args, &rootID);
7593 		if (status != B_OK || mount->volume->ops == NULL)
7594 			goto err2;
7595 	} else {
7596 		{
7597 			VnodePutter temp;
7598 			status = path_to_vnode(path, true, temp, NULL, kernel);
7599 			coveredNode = temp.Detach();
7600 		}
7601 		if (status != B_OK)
7602 			goto err2;
7603 
7604 		mount->covers_vnode = coveredNode;
7605 
7606 		// make sure covered_vnode is a directory
7607 		if (!S_ISDIR(coveredNode->Type())) {
7608 			status = B_NOT_A_DIRECTORY;
7609 			goto err3;
7610 		}
7611 
7612 		if (coveredNode->IsCovered()) {
7613 			// this is already a covered vnode
7614 			status = B_BUSY;
7615 			goto err3;
7616 		}
7617 
7618 		// mount it/them
7619 		fs_volume* volume = mount->volume;
7620 		while (volume) {
7621 			status = volume->file_system->mount(volume, device, flags, args,
7622 				&rootID);
7623 			if (status != B_OK || volume->ops == NULL) {
7624 				if (status == B_OK && volume->ops == NULL)
7625 					panic("fs_mount: mount() succeeded but ops is NULL!");
7626 				if (volume->sub_volume)
7627 					goto err4;
7628 				goto err3;
7629 			}
7630 
7631 			volume = volume->super_volume;
7632 		}
7633 
7634 		volume = mount->volume;
7635 		while (volume) {
7636 			if (volume->ops->all_layers_mounted != NULL)
7637 				volume->ops->all_layers_mounted(volume);
7638 			volume = volume->super_volume;
7639 		}
7640 	}
7641 
7642 	// the root node is supposed to be owned by the file system - it must
7643 	// exist at this point
7644 	rw_lock_write_lock(&sVnodeLock);
7645 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7646 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7647 		panic("fs_mount: file system does not own its root node!\n");
7648 		status = B_ERROR;
7649 		rw_lock_write_unlock(&sVnodeLock);
7650 		goto err4;
7651 	}
7652 
7653 	// set up the links between the root vnode and the vnode it covers
7654 	if (coveredNode != NULL) {
7655 		if (coveredNode->IsCovered()) {
7656 			// the vnode is covered now
7657 			status = B_BUSY;
7658 			rw_lock_write_unlock(&sVnodeLock);
7659 			goto err4;
7660 		}
7661 
7662 		mount->root_vnode->covers = coveredNode;
7663 		mount->root_vnode->SetCovering(true);
7664 
7665 		coveredNode->covered_by = mount->root_vnode;
7666 		coveredNode->SetCovered(true);
7667 	}
7668 	rw_lock_write_unlock(&sVnodeLock);
7669 
7670 	if (!sRoot) {
7671 		sRoot = mount->root_vnode;
7672 		mutex_lock(&sIOContextRootLock);
7673 		get_current_io_context(true)->root = sRoot;
7674 		mutex_unlock(&sIOContextRootLock);
7675 		inc_vnode_ref_count(sRoot);
7676 	}
7677 
7678 	// supply the partition (if any) with the mount cookie and mark it mounted
7679 	if (partition) {
7680 		partition->SetMountCookie(mount->volume->private_volume);
7681 		partition->SetVolumeID(mount->id);
7682 
7683 		// keep a partition reference as long as the partition is mounted
7684 		partitionRegistrar.Detach();
7685 		mount->partition = partition;
7686 		mount->owns_file_device = newlyCreatedFileDevice;
7687 		fileDeviceDeleter.id = -1;
7688 	}
7689 
7690 	notify_mount(mount->id,
7691 		coveredNode != NULL ? coveredNode->device : -1,
7692 		coveredNode ? coveredNode->id : -1);
7693 
7694 	return mount->id;
7695 
7696 err4:
7697 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7698 err3:
7699 	if (coveredNode != NULL)
7700 		put_vnode(coveredNode);
7701 err2:
7702 	rw_lock_write_lock(&sMountLock);
7703 	sMountsTable->Remove(mount);
7704 	rw_lock_write_unlock(&sMountLock);
7705 err1:
7706 	delete mount;
7707 
7708 	return status;
7709 }
7710 
7711 
7712 static status_t
fs_unmount(char * path,dev_t mountID,uint32 flags,bool kernel)7713 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7714 {
7715 	struct fs_mount* mount;
7716 	status_t err;
7717 
7718 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7719 		mountID, kernel));
7720 
7721 	VnodePutter pathVnode;
7722 	if (path != NULL) {
7723 		err = path_to_vnode(path, true, pathVnode, NULL, kernel);
7724 		if (err != B_OK)
7725 			return B_ENTRY_NOT_FOUND;
7726 	}
7727 
7728 	RecursiveLocker mountOpLocker(sMountOpLock);
7729 	ReadLocker mountLocker(sMountLock);
7730 
7731 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7732 	if (mount == NULL) {
7733 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7734 			pathVnode.Get());
7735 	}
7736 
7737 	mountLocker.Unlock();
7738 
7739 	if (path != NULL) {
7740 		if (mount->root_vnode != pathVnode.Get()) {
7741 			// not mountpoint
7742 			return B_BAD_VALUE;
7743 		}
7744 
7745 		pathVnode.Unset();
7746 	}
7747 
7748 	// if the volume is associated with a partition, lock the device of the
7749 	// partition as long as we are unmounting
7750 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7751 	KPartition* partition = mount->partition;
7752 	KDiskDevice* diskDevice = NULL;
7753 	if (partition != NULL) {
7754 		if (partition->Device() == NULL) {
7755 			dprintf("fs_unmount(): There is no device!\n");
7756 			return B_ERROR;
7757 		}
7758 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7759 		if (!diskDevice) {
7760 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7761 			return B_ERROR;
7762 		}
7763 	}
7764 	DeviceWriteLocker writeLocker(diskDevice, true);
7765 
7766 	// make sure, that the partition is not busy
7767 	if (partition != NULL) {
7768 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7769 			dprintf("fs_unmount(): Partition is busy.\n");
7770 			return B_BUSY;
7771 		}
7772 	}
7773 
7774 	// grab the vnode master mutex to keep someone from creating
7775 	// a vnode while we're figuring out if we can continue
7776 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7777 
7778 	bool disconnectedDescriptors = false;
7779 
7780 	while (true) {
7781 		bool busy = false;
7782 
7783 		// cycle through the list of vnodes associated with this mount and
7784 		// make sure all of them are not busy or have refs on them
7785 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7786 		while (struct vnode* vnode = iterator.Next()) {
7787 			if (vnode->IsBusy()) {
7788 				dprintf("fs_unmount(): inode %" B_PRIdINO " is busy\n", vnode->id);
7789 				busy = true;
7790 				break;
7791 			}
7792 
7793 			// check the vnode's ref count -- subtract additional references for
7794 			// covering
7795 			int32 refCount = vnode->ref_count;
7796 			if (vnode->covers != NULL)
7797 				refCount--;
7798 			if (vnode->covered_by != NULL)
7799 				refCount--;
7800 
7801 			if (refCount != 0) {
7802 				dprintf("fs_unmount(): inode %" B_PRIdINO " is still referenced\n", vnode->id);
7803 				// there are still vnodes in use on this mount, so we cannot
7804 				// unmount yet
7805 				busy = true;
7806 				break;
7807 			}
7808 		}
7809 
7810 		if (!busy)
7811 			break;
7812 
7813 		if ((flags & B_FORCE_UNMOUNT) == 0)
7814 			return B_BUSY;
7815 
7816 		if (disconnectedDescriptors) {
7817 			// wait a bit until the last access is finished, and then try again
7818 			vnodesWriteLocker.Unlock();
7819 			snooze(100000);
7820 			// TODO: if there is some kind of bug that prevents the ref counts
7821 			// from getting back to zero, this will fall into an endless loop...
7822 			vnodesWriteLocker.Lock();
7823 			continue;
7824 		}
7825 
7826 		// the file system is still busy - but we're forced to unmount it,
7827 		// so let's disconnect all open file descriptors
7828 
7829 		mount->unmounting = true;
7830 			// prevent new vnodes from being created
7831 
7832 		vnodesWriteLocker.Unlock();
7833 
7834 		disconnect_mount_or_vnode_fds(mount, NULL);
7835 		disconnectedDescriptors = true;
7836 
7837 		vnodesWriteLocker.Lock();
7838 	}
7839 
7840 	// We can safely continue. Mark all of the vnodes busy and this mount
7841 	// structure in unmounting state. Also undo the vnode covers/covered_by
7842 	// links.
7843 	mount->unmounting = true;
7844 
7845 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7846 	while (struct vnode* vnode = iterator.Next()) {
7847 		// Remove all covers/covered_by links from other mounts' nodes to this
7848 		// vnode and adjust the node ref count accordingly. We will release the
7849 		// references to the external vnodes below.
7850 		if (Vnode* coveredNode = vnode->covers) {
7851 			if (Vnode* coveringNode = vnode->covered_by) {
7852 				// We have both covered and covering vnodes, so just remove us
7853 				// from the chain.
7854 				coveredNode->covered_by = coveringNode;
7855 				coveringNode->covers = coveredNode;
7856 				vnode->ref_count -= 2;
7857 
7858 				vnode->covered_by = NULL;
7859 				vnode->covers = NULL;
7860 				vnode->SetCovering(false);
7861 				vnode->SetCovered(false);
7862 			} else {
7863 				// We only have a covered vnode. Remove its link to us.
7864 				coveredNode->covered_by = NULL;
7865 				coveredNode->SetCovered(false);
7866 				vnode->ref_count--;
7867 
7868 				// If the other node is an external vnode, we keep its link
7869 				// link around so we can put the reference later on. Otherwise
7870 				// we get rid of it right now.
7871 				if (coveredNode->mount == mount) {
7872 					vnode->covers = NULL;
7873 					coveredNode->ref_count--;
7874 				}
7875 			}
7876 		} else if (Vnode* coveringNode = vnode->covered_by) {
7877 			// We only have a covering vnode. Remove its link to us.
7878 			coveringNode->covers = NULL;
7879 			coveringNode->SetCovering(false);
7880 			vnode->ref_count--;
7881 
7882 			// If the other node is an external vnode, we keep its link
7883 			// link around so we can put the reference later on. Otherwise
7884 			// we get rid of it right now.
7885 			if (coveringNode->mount == mount) {
7886 				vnode->covered_by = NULL;
7887 				coveringNode->ref_count--;
7888 			}
7889 		}
7890 
7891 		vnode->SetBusy(true);
7892 		vnode_to_be_freed(vnode);
7893 	}
7894 
7895 	vnodesWriteLocker.Unlock();
7896 
7897 	// Free all vnodes associated with this mount.
7898 	// They will be removed from the mount list by free_vnode(), so
7899 	// we don't have to do this.
7900 	while (struct vnode* vnode = mount->vnodes.Head()) {
7901 		// Put the references to external covered/covering vnodes we kept above.
7902 		if (Vnode* coveredNode = vnode->covers)
7903 			put_vnode(coveredNode);
7904 		if (Vnode* coveringNode = vnode->covered_by)
7905 			put_vnode(coveringNode);
7906 
7907 		free_vnode(vnode, false);
7908 	}
7909 
7910 	// remove the mount structure from the hash table
7911 	rw_lock_write_lock(&sMountLock);
7912 	sMountsTable->Remove(mount);
7913 	rw_lock_write_unlock(&sMountLock);
7914 
7915 	mountOpLocker.Unlock();
7916 
7917 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7918 	notify_unmount(mount->id);
7919 
7920 	// dereference the partition and mark it unmounted
7921 	if (partition) {
7922 		partition->SetVolumeID(-1);
7923 		partition->SetMountCookie(NULL);
7924 
7925 		if (mount->owns_file_device)
7926 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7927 		partition->Unregister();
7928 	}
7929 
7930 	delete mount;
7931 	return B_OK;
7932 }
7933 
7934 
7935 static status_t
fs_sync(dev_t device)7936 fs_sync(dev_t device)
7937 {
7938 	struct fs_mount* mount;
7939 	status_t status = get_mount(device, &mount);
7940 	if (status != B_OK)
7941 		return status;
7942 
7943 	struct vnode marker;
7944 	memset(&marker, 0, sizeof(marker));
7945 	marker.SetBusy(true);
7946 	marker.SetRemoved(true);
7947 
7948 	// First, synchronize all file caches
7949 
7950 	while (true) {
7951 		WriteLocker locker(sVnodeLock);
7952 			// Note: That's the easy way. Which is probably OK for sync(),
7953 			// since it's a relatively rare call and doesn't need to allow for
7954 			// a lot of concurrency. Using a read lock would be possible, but
7955 			// also more involved, since we had to lock the individual nodes
7956 			// and take care of the locking order, which we might not want to
7957 			// do while holding fs_mount::lock.
7958 
7959 		// synchronize access to vnode list
7960 		mutex_lock(&mount->lock);
7961 
7962 		struct vnode* vnode;
7963 		if (!marker.IsRemoved()) {
7964 			vnode = mount->vnodes.GetNext(&marker);
7965 			mount->vnodes.Remove(&marker);
7966 			marker.SetRemoved(true);
7967 		} else
7968 			vnode = mount->vnodes.First();
7969 
7970 		while (vnode != NULL && (vnode->cache == NULL
7971 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7972 			// TODO: we could track writes (and writable mapped vnodes)
7973 			//	and have a simple flag that we could test for here
7974 			vnode = mount->vnodes.GetNext(vnode);
7975 		}
7976 
7977 		if (vnode != NULL) {
7978 			// insert marker vnode again
7979 			mount->vnodes.InsertBefore(mount->vnodes.GetNext(vnode), &marker);
7980 			marker.SetRemoved(false);
7981 		}
7982 
7983 		mutex_unlock(&mount->lock);
7984 
7985 		if (vnode == NULL)
7986 			break;
7987 
7988 		vnode = lookup_vnode(mount->id, vnode->id);
7989 		if (vnode == NULL || vnode->IsBusy())
7990 			continue;
7991 
7992 		if (inc_vnode_ref_count(vnode) == 0) {
7993 			// this vnode has been unused before
7994 			vnode_used(vnode);
7995 		}
7996 
7997 		locker.Unlock();
7998 
7999 		if (vnode->cache != NULL && !vnode->IsRemoved())
8000 			vnode->cache->WriteModified();
8001 
8002 		put_vnode(vnode);
8003 	}
8004 
8005 	// Let the file systems do their synchronizing work
8006 	if (HAS_FS_MOUNT_CALL(mount, sync))
8007 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
8008 
8009 	// Finally, flush the underlying device's write cache (if possible.)
8010 	if (mount->partition != NULL && mount->partition->Device() != NULL)
8011 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
8012 
8013 	put_mount(mount);
8014 	return status;
8015 }
8016 
8017 
8018 static status_t
fs_read_info(dev_t device,struct fs_info * info)8019 fs_read_info(dev_t device, struct fs_info* info)
8020 {
8021 	struct fs_mount* mount;
8022 	status_t status = get_mount(device, &mount);
8023 	if (status != B_OK)
8024 		return status;
8025 
8026 	memset(info, 0, sizeof(struct fs_info));
8027 
8028 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
8029 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
8030 
8031 	// fill in info the file system doesn't (have to) know about
8032 	if (status == B_OK) {
8033 		info->dev = mount->id;
8034 		info->root = mount->root_vnode->id;
8035 
8036 		fs_volume* volume = mount->volume;
8037 		while (volume->super_volume != NULL)
8038 			volume = volume->super_volume;
8039 
8040 		strlcpy(info->fsh_name, volume->file_system_name,
8041 			sizeof(info->fsh_name));
8042 		if (mount->device_name != NULL) {
8043 			strlcpy(info->device_name, mount->device_name,
8044 				sizeof(info->device_name));
8045 		}
8046 	}
8047 
8048 	// if the call is not supported by the file system, there are still
8049 	// the parts that we filled out ourselves
8050 
8051 	put_mount(mount);
8052 	return status;
8053 }
8054 
8055 
8056 static status_t
fs_write_info(dev_t device,const struct fs_info * info,int mask)8057 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8058 {
8059 	struct fs_mount* mount;
8060 	status_t status = get_mount(device, &mount);
8061 	if (status != B_OK)
8062 		return status;
8063 
8064 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8065 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8066 	else
8067 		status = B_READ_ONLY_DEVICE;
8068 
8069 	put_mount(mount);
8070 	return status;
8071 }
8072 
8073 
8074 static dev_t
fs_next_device(int32 * _cookie)8075 fs_next_device(int32* _cookie)
8076 {
8077 	struct fs_mount* mount = NULL;
8078 	dev_t device = *_cookie;
8079 
8080 	rw_lock_read_lock(&sMountLock);
8081 
8082 	// Since device IDs are assigned sequentially, this algorithm
8083 	// does work good enough. It makes sure that the device list
8084 	// returned is sorted, and that no device is skipped when an
8085 	// already visited device got unmounted.
8086 
8087 	while (device < sNextMountID) {
8088 		mount = find_mount(device++);
8089 		if (mount != NULL && mount->volume->private_volume != NULL)
8090 			break;
8091 	}
8092 
8093 	*_cookie = device;
8094 
8095 	if (mount != NULL)
8096 		device = mount->id;
8097 	else
8098 		device = B_BAD_VALUE;
8099 
8100 	rw_lock_read_unlock(&sMountLock);
8101 
8102 	return device;
8103 }
8104 
8105 
8106 ssize_t
fs_read_attr(int fd,const char * attribute,uint32 type,off_t pos,void * buffer,size_t readBytes)8107 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8108 	void *buffer, size_t readBytes)
8109 {
8110 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8111 	if (attrFD < 0)
8112 		return attrFD;
8113 
8114 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8115 
8116 	_kern_close(attrFD);
8117 
8118 	return bytesRead;
8119 }
8120 
8121 
8122 static status_t
get_cwd(char * buffer,size_t size,bool kernel)8123 get_cwd(char* buffer, size_t size, bool kernel)
8124 {
8125 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8126 
8127 	// Get current working directory from io context
8128 	const struct io_context* context = get_current_io_context(kernel);
8129 	rw_lock_read_lock(&context->lock);
8130 
8131 	struct vnode* vnode = context->cwd;
8132 	if (vnode != NULL)
8133 		inc_vnode_ref_count(vnode);
8134 
8135 	rw_lock_read_unlock(&context->lock);
8136 
8137 	if (vnode == NULL)
8138 		return B_ERROR;
8139 
8140 	status_t status = dir_vnode_to_path(vnode, buffer, size, kernel);
8141 	put_vnode(vnode);
8142 
8143 	return status;
8144 }
8145 
8146 
8147 static status_t
set_cwd(int fd,char * path,bool kernel)8148 set_cwd(int fd, char* path, bool kernel)
8149 {
8150 	struct io_context* context;
8151 	struct vnode* oldDirectory;
8152 
8153 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8154 
8155 	// Get vnode for passed path, and bail if it failed
8156 	VnodePutter vnode;
8157 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
8158 	if (status < 0)
8159 		return status;
8160 
8161 	if (!S_ISDIR(vnode->Type())) {
8162 		// nope, can't cwd to here
8163 		return B_NOT_A_DIRECTORY;
8164 	}
8165 
8166 	// We need to have the permission to enter the directory, too
8167 	if (HAS_FS_CALL(vnode, access)) {
8168 		status = FS_CALL(vnode.Get(), access, X_OK);
8169 		if (status != B_OK)
8170 			return status;
8171 	}
8172 
8173 	// Get current io context and lock
8174 	context = get_current_io_context(kernel);
8175 	rw_lock_write_lock(&context->lock);
8176 
8177 	// save the old current working directory first
8178 	oldDirectory = context->cwd;
8179 	context->cwd = vnode.Detach();
8180 
8181 	rw_lock_write_unlock(&context->lock);
8182 
8183 	if (oldDirectory)
8184 		put_vnode(oldDirectory);
8185 
8186 	return B_NO_ERROR;
8187 }
8188 
8189 
8190 static status_t
user_copy_name(char * to,const char * from,size_t length)8191 user_copy_name(char* to, const char* from, size_t length)
8192 {
8193 	ssize_t len = user_strlcpy(to, from, length);
8194 	if (len < 0)
8195 		return len;
8196 	if (len >= (ssize_t)length)
8197 		return B_NAME_TOO_LONG;
8198 	return B_OK;
8199 }
8200 
8201 
8202 //	#pragma mark - kernel mirrored syscalls
8203 
8204 
8205 dev_t
_kern_mount(const char * path,const char * device,const char * fsName,uint32 flags,const char * args,size_t argsLength)8206 _kern_mount(const char* path, const char* device, const char* fsName,
8207 	uint32 flags, const char* args, size_t argsLength)
8208 {
8209 	KPath pathBuffer(path);
8210 	if (pathBuffer.InitCheck() != B_OK)
8211 		return B_NO_MEMORY;
8212 
8213 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8214 }
8215 
8216 
8217 status_t
_kern_unmount(const char * path,uint32 flags)8218 _kern_unmount(const char* path, uint32 flags)
8219 {
8220 	KPath pathBuffer(path);
8221 	if (pathBuffer.InitCheck() != B_OK)
8222 		return B_NO_MEMORY;
8223 
8224 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8225 }
8226 
8227 
8228 status_t
_kern_read_fs_info(dev_t device,struct fs_info * info)8229 _kern_read_fs_info(dev_t device, struct fs_info* info)
8230 {
8231 	if (info == NULL)
8232 		return B_BAD_VALUE;
8233 
8234 	return fs_read_info(device, info);
8235 }
8236 
8237 
8238 status_t
_kern_write_fs_info(dev_t device,const struct fs_info * info,int mask)8239 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8240 {
8241 	if (info == NULL)
8242 		return B_BAD_VALUE;
8243 
8244 	return fs_write_info(device, info, mask);
8245 }
8246 
8247 
8248 status_t
_kern_sync(void)8249 _kern_sync(void)
8250 {
8251 	// Note: _kern_sync() is also called from _user_sync()
8252 	int32 cookie = 0;
8253 	dev_t device;
8254 	while ((device = next_dev(&cookie)) >= 0) {
8255 		status_t status = fs_sync(device);
8256 		if (status != B_OK && status != B_BAD_VALUE) {
8257 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8258 				strerror(status));
8259 		}
8260 	}
8261 
8262 	return B_OK;
8263 }
8264 
8265 
8266 dev_t
_kern_next_device(int32 * _cookie)8267 _kern_next_device(int32* _cookie)
8268 {
8269 	return fs_next_device(_cookie);
8270 }
8271 
8272 
8273 status_t
_kern_get_next_fd_info(team_id teamID,uint32 * _cookie,fd_info * info,size_t infoSize)8274 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8275 	size_t infoSize)
8276 {
8277 	if (infoSize != sizeof(fd_info))
8278 		return B_BAD_VALUE;
8279 
8280 	// get the team
8281 	Team* team = Team::Get(teamID);
8282 	if (team == NULL)
8283 		return B_BAD_TEAM_ID;
8284 	BReference<Team> teamReference(team, true);
8285 
8286 	// now that we have a team reference, its I/O context won't go away
8287 	const io_context* context = team->io_context;
8288 	ReadLocker contextLocker(context->lock);
8289 
8290 	uint32 slot = *_cookie;
8291 
8292 	struct file_descriptor* descriptor;
8293 	while (slot < context->table_size
8294 			&& (descriptor = context->fds[slot]) == NULL) {
8295 		slot++;
8296 	}
8297 
8298 	if (slot >= context->table_size)
8299 		return B_ENTRY_NOT_FOUND;
8300 
8301 	info->number = slot;
8302 	info->open_mode = descriptor->open_mode;
8303 
8304 	struct vnode* vnode = fd_vnode(descriptor);
8305 	if (vnode != NULL) {
8306 		info->device = vnode->device;
8307 		info->node = vnode->id;
8308 	} else if (descriptor->u.mount != NULL) {
8309 		info->device = descriptor->u.mount->id;
8310 		info->node = -1;
8311 	}
8312 
8313 	*_cookie = slot + 1;
8314 	return B_OK;
8315 }
8316 
8317 
8318 int
_kern_open_entry_ref(dev_t device,ino_t inode,const char * name,int openMode,int perms)8319 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8320 	int perms)
8321 {
8322 	if ((openMode & O_CREAT) != 0) {
8323 		return file_create_entry_ref(device, inode, name, openMode, perms,
8324 			true);
8325 	}
8326 
8327 	return file_open_entry_ref(device, inode, name, openMode, true);
8328 }
8329 
8330 
8331 /*!	\brief Opens a node specified by a FD + path pair.
8332 
8333 	At least one of \a fd and \a path must be specified.
8334 	If only \a fd is given, the function opens the node identified by this
8335 	FD. If only a path is given, this path is opened. If both are given and
8336 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8337 	of the directory (!) identified by \a fd.
8338 
8339 	\param fd The FD. May be < 0.
8340 	\param path The absolute or relative path. May be \c NULL.
8341 	\param openMode The open mode.
8342 	\return A FD referring to the newly opened node, or an error code,
8343 			if an error occurs.
8344 */
8345 int
_kern_open(int fd,const char * path,int openMode,int perms)8346 _kern_open(int fd, const char* path, int openMode, int perms)
8347 {
8348 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8349 	if (pathBuffer.InitCheck() != B_OK)
8350 		return B_NO_MEMORY;
8351 
8352 	if ((openMode & O_CREAT) != 0)
8353 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8354 
8355 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8356 }
8357 
8358 
8359 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8360 
8361 	The supplied name may be \c NULL, in which case directory identified
8362 	by \a device and \a inode will be opened. Otherwise \a device and
8363 	\a inode identify the parent directory of the directory to be opened
8364 	and \a name its entry name.
8365 
8366 	\param device If \a name is specified the ID of the device the parent
8367 		   directory of the directory to be opened resides on, otherwise
8368 		   the device of the directory itself.
8369 	\param inode If \a name is specified the node ID of the parent
8370 		   directory of the directory to be opened, otherwise node ID of the
8371 		   directory itself.
8372 	\param name The entry name of the directory to be opened. If \c NULL,
8373 		   the \a device + \a inode pair identify the node to be opened.
8374 	\return The FD of the newly opened directory or an error code, if
8375 			something went wrong.
8376 */
8377 int
_kern_open_dir_entry_ref(dev_t device,ino_t inode,const char * name)8378 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8379 {
8380 	return dir_open_entry_ref(device, inode, name, true);
8381 }
8382 
8383 
8384 /*!	\brief Opens a directory specified by a FD + path pair.
8385 
8386 	At least one of \a fd and \a path must be specified.
8387 	If only \a fd is given, the function opens the directory identified by this
8388 	FD. If only a path is given, this path is opened. If both are given and
8389 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8390 	of the directory (!) identified by \a fd.
8391 
8392 	\param fd The FD. May be < 0.
8393 	\param path The absolute or relative path. May be \c NULL.
8394 	\return A FD referring to the newly opened directory, or an error code,
8395 			if an error occurs.
8396 */
8397 int
_kern_open_dir(int fd,const char * path)8398 _kern_open_dir(int fd, const char* path)
8399 {
8400 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8401 	if (pathBuffer.InitCheck() != B_OK)
8402 		return B_NO_MEMORY;
8403 
8404 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8405 }
8406 
8407 
8408 status_t
_kern_fcntl(int fd,int op,size_t argument)8409 _kern_fcntl(int fd, int op, size_t argument)
8410 {
8411 	return common_fcntl(fd, op, argument, true);
8412 }
8413 
8414 
8415 status_t
_kern_fsync(int fd)8416 _kern_fsync(int fd)
8417 {
8418 	return common_sync(fd, true);
8419 }
8420 
8421 
8422 status_t
_kern_lock_node(int fd)8423 _kern_lock_node(int fd)
8424 {
8425 	return common_lock_node(fd, true);
8426 }
8427 
8428 
8429 status_t
_kern_unlock_node(int fd)8430 _kern_unlock_node(int fd)
8431 {
8432 	return common_unlock_node(fd, true);
8433 }
8434 
8435 
8436 status_t
_kern_preallocate(int fd,off_t offset,off_t length)8437 _kern_preallocate(int fd, off_t offset, off_t length)
8438 {
8439 	return common_preallocate(fd, offset, length, true);
8440 }
8441 
8442 
8443 status_t
_kern_create_dir_entry_ref(dev_t device,ino_t inode,const char * name,int perms)8444 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8445 	int perms)
8446 {
8447 	return dir_create_entry_ref(device, inode, name, perms, true);
8448 }
8449 
8450 
8451 /*!	\brief Creates a directory specified by a FD + path pair.
8452 
8453 	\a path must always be specified (it contains the name of the new directory
8454 	at least). If only a path is given, this path identifies the location at
8455 	which the directory shall be created. If both \a fd and \a path are given
8456 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8457 	of the directory (!) identified by \a fd.
8458 
8459 	\param fd The FD. May be < 0.
8460 	\param path The absolute or relative path. Must not be \c NULL.
8461 	\param perms The access permissions the new directory shall have.
8462 	\return \c B_OK, if the directory has been created successfully, another
8463 			error code otherwise.
8464 */
8465 status_t
_kern_create_dir(int fd,const char * path,int perms)8466 _kern_create_dir(int fd, const char* path, int perms)
8467 {
8468 	KPath pathBuffer(path, KPath::DEFAULT);
8469 	if (pathBuffer.InitCheck() != B_OK)
8470 		return B_NO_MEMORY;
8471 
8472 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8473 }
8474 
8475 
8476 status_t
_kern_remove_dir(int fd,const char * path)8477 _kern_remove_dir(int fd, const char* path)
8478 {
8479 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8480 	if (pathBuffer.InitCheck() != B_OK)
8481 		return B_NO_MEMORY;
8482 
8483 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8484 }
8485 
8486 
8487 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8488 
8489 	At least one of \a fd and \a path must be specified.
8490 	If only \a fd is given, the function the symlink to be read is the node
8491 	identified by this FD. If only a path is given, this path identifies the
8492 	symlink to be read. If both are given and the path is absolute, \a fd is
8493 	ignored; a relative path is reckoned off of the directory (!) identified
8494 	by \a fd.
8495 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8496 	will still be updated to reflect the required buffer size.
8497 
8498 	\param fd The FD. May be < 0.
8499 	\param path The absolute or relative path. May be \c NULL.
8500 	\param buffer The buffer into which the contents of the symlink shall be
8501 		   written.
8502 	\param _bufferSize A pointer to the size of the supplied buffer.
8503 	\return The length of the link on success or an appropriate error code
8504 */
8505 status_t
_kern_read_link(int fd,const char * path,char * buffer,size_t * _bufferSize)8506 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8507 {
8508 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8509 	if (pathBuffer.InitCheck() != B_OK)
8510 		return B_NO_MEMORY;
8511 
8512 	return common_read_link(fd, pathBuffer.LockBuffer(),
8513 		buffer, _bufferSize, true);
8514 }
8515 
8516 
8517 /*!	\brief Creates a symlink specified by a FD + path pair.
8518 
8519 	\a path must always be specified (it contains the name of the new symlink
8520 	at least). If only a path is given, this path identifies the location at
8521 	which the symlink shall be created. If both \a fd and \a path are given and
8522 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8523 	of the directory (!) identified by \a fd.
8524 
8525 	\param fd The FD. May be < 0.
8526 	\param toPath The absolute or relative path. Must not be \c NULL.
8527 	\param mode The access permissions the new symlink shall have.
8528 	\return \c B_OK, if the symlink has been created successfully, another
8529 			error code otherwise.
8530 */
8531 status_t
_kern_create_symlink(int fd,const char * path,const char * toPath,int mode)8532 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8533 {
8534 	KPath pathBuffer(path);
8535 	if (pathBuffer.InitCheck() != B_OK)
8536 		return B_NO_MEMORY;
8537 
8538 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8539 		toPath, mode, true);
8540 }
8541 
8542 
8543 status_t
_kern_create_link(int pathFD,const char * path,int toFD,const char * toPath,bool traverseLeafLink)8544 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8545 	bool traverseLeafLink)
8546 {
8547 	KPath pathBuffer(path);
8548 	KPath toPathBuffer(toPath);
8549 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8550 		return B_NO_MEMORY;
8551 
8552 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8553 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8554 }
8555 
8556 
8557 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8558 
8559 	\a path must always be specified (it contains at least the name of the entry
8560 	to be deleted). If only a path is given, this path identifies the entry
8561 	directly. If both \a fd and \a path are given and the path is absolute,
8562 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8563 	identified by \a fd.
8564 
8565 	\param fd The FD. May be < 0.
8566 	\param path The absolute or relative path. Must not be \c NULL.
8567 	\return \c B_OK, if the entry has been removed successfully, another
8568 			error code otherwise.
8569 */
8570 status_t
_kern_unlink(int fd,const char * path)8571 _kern_unlink(int fd, const char* path)
8572 {
8573 	KPath pathBuffer(path);
8574 	if (pathBuffer.InitCheck() != B_OK)
8575 		return B_NO_MEMORY;
8576 
8577 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8578 }
8579 
8580 
8581 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8582 		   by another FD + path pair.
8583 
8584 	\a oldPath and \a newPath must always be specified (they contain at least
8585 	the name of the entry). If only a path is given, this path identifies the
8586 	entry directly. If both a FD and a path are given and the path is absolute,
8587 	the FD is ignored; a relative path is reckoned off of the directory (!)
8588 	identified by the respective FD.
8589 
8590 	\param oldFD The FD of the old location. May be < 0.
8591 	\param oldPath The absolute or relative path of the old location. Must not
8592 		   be \c NULL.
8593 	\param newFD The FD of the new location. May be < 0.
8594 	\param newPath The absolute or relative path of the new location. Must not
8595 		   be \c NULL.
8596 	\return \c B_OK, if the entry has been moved successfully, another
8597 			error code otherwise.
8598 */
8599 status_t
_kern_rename(int oldFD,const char * oldPath,int newFD,const char * newPath)8600 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8601 {
8602 	KPath oldPathBuffer(oldPath);
8603 	KPath newPathBuffer(newPath);
8604 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8605 		return B_NO_MEMORY;
8606 
8607 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8608 		newFD, newPathBuffer.LockBuffer(), true);
8609 }
8610 
8611 
8612 status_t
_kern_access(int fd,const char * path,int mode,bool effectiveUserGroup)8613 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8614 {
8615 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8616 	if (pathBuffer.InitCheck() != B_OK)
8617 		return B_NO_MEMORY;
8618 
8619 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8620 		true);
8621 }
8622 
8623 
8624 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8625 
8626 	If only \a fd is given, the stat operation associated with the type
8627 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8628 	given, this path identifies the entry for whose node to retrieve the
8629 	stat data. If both \a fd and \a path are given and the path is absolute,
8630 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8631 	identified by \a fd and specifies the entry whose stat data shall be
8632 	retrieved.
8633 
8634 	\param fd The FD. May be < 0.
8635 	\param path The absolute or relative path. Must not be \c NULL.
8636 	\param traverseLeafLink If \a path is given, \c true specifies that the
8637 		   function shall not stick to symlinks, but traverse them.
8638 	\param stat The buffer the stat data shall be written into.
8639 	\param statSize The size of the supplied stat buffer.
8640 	\return \c B_OK, if the the stat data have been read successfully, another
8641 			error code otherwise.
8642 */
8643 status_t
_kern_read_stat(int fd,const char * path,bool traverseLeafLink,struct stat * stat,size_t statSize)8644 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8645 	struct stat* stat, size_t statSize)
8646 {
8647 	struct stat completeStat;
8648 	struct stat* originalStat = NULL;
8649 	status_t status;
8650 
8651 	if (statSize > sizeof(struct stat))
8652 		return B_BAD_VALUE;
8653 
8654 	// this supports different stat extensions
8655 	if (statSize < sizeof(struct stat)) {
8656 		originalStat = stat;
8657 		stat = &completeStat;
8658 	}
8659 
8660 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8661 
8662 	if (status == B_OK && originalStat != NULL)
8663 		memcpy(originalStat, stat, statSize);
8664 
8665 	return status;
8666 }
8667 
8668 
8669 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8670 
8671 	If only \a fd is given, the stat operation associated with the type
8672 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8673 	given, this path identifies the entry for whose node to write the
8674 	stat data. If both \a fd and \a path are given and the path is absolute,
8675 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8676 	identified by \a fd and specifies the entry whose stat data shall be
8677 	written.
8678 
8679 	\param fd The FD. May be < 0.
8680 	\param path The absolute or relative path. May be \c NULL.
8681 	\param traverseLeafLink If \a path is given, \c true specifies that the
8682 		   function shall not stick to symlinks, but traverse them.
8683 	\param stat The buffer containing the stat data to be written.
8684 	\param statSize The size of the supplied stat buffer.
8685 	\param statMask A mask specifying which parts of the stat data shall be
8686 		   written.
8687 	\return \c B_OK, if the the stat data have been written successfully,
8688 			another error code otherwise.
8689 */
8690 status_t
_kern_write_stat(int fd,const char * path,bool traverseLeafLink,const struct stat * stat,size_t statSize,int statMask)8691 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8692 	const struct stat* stat, size_t statSize, int statMask)
8693 {
8694 	struct stat completeStat;
8695 
8696 	if (statSize > sizeof(struct stat))
8697 		return B_BAD_VALUE;
8698 
8699 	// this supports different stat extensions
8700 	if (statSize < sizeof(struct stat)) {
8701 		memset((uint8*)&completeStat + statSize, 0,
8702 			sizeof(struct stat) - statSize);
8703 		memcpy(&completeStat, stat, statSize);
8704 		stat = &completeStat;
8705 	}
8706 
8707 	status_t status;
8708 
8709 	if (path != NULL) {
8710 		// path given: write the stat of the node referred to by (fd, path)
8711 		KPath pathBuffer(path);
8712 		if (pathBuffer.InitCheck() != B_OK)
8713 			return B_NO_MEMORY;
8714 
8715 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8716 			traverseLeafLink, stat, statMask, true);
8717 	} else {
8718 		// no path given: get the FD and use the FD operation
8719 		FileDescriptorPutter descriptor
8720 			(get_fd(get_current_io_context(true), fd));
8721 		if (!descriptor.IsSet())
8722 			return B_FILE_ERROR;
8723 
8724 		if (descriptor->ops->fd_write_stat)
8725 			status = descriptor->ops->fd_write_stat(descriptor.Get(), stat, statMask);
8726 		else
8727 			status = B_UNSUPPORTED;
8728 	}
8729 
8730 	return status;
8731 }
8732 
8733 
8734 int
_kern_open_attr_dir(int fd,const char * path,bool traverseLeafLink)8735 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8736 {
8737 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8738 	if (pathBuffer.InitCheck() != B_OK)
8739 		return B_NO_MEMORY;
8740 
8741 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8742 }
8743 
8744 
8745 int
_kern_open_attr(int fd,const char * path,const char * name,uint32 type,int openMode)8746 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8747 	int openMode)
8748 {
8749 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8750 	if (pathBuffer.InitCheck() != B_OK)
8751 		return B_NO_MEMORY;
8752 
8753 	if ((openMode & O_CREAT) != 0) {
8754 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8755 			true);
8756 	}
8757 
8758 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8759 }
8760 
8761 
8762 status_t
_kern_remove_attr(int fd,const char * name)8763 _kern_remove_attr(int fd, const char* name)
8764 {
8765 	return attr_remove(fd, name, true);
8766 }
8767 
8768 
8769 status_t
_kern_rename_attr(int fromFile,const char * fromName,int toFile,const char * toName)8770 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8771 	const char* toName)
8772 {
8773 	return attr_rename(fromFile, fromName, toFile, toName, true);
8774 }
8775 
8776 
8777 int
_kern_open_index_dir(dev_t device)8778 _kern_open_index_dir(dev_t device)
8779 {
8780 	return index_dir_open(device, true);
8781 }
8782 
8783 
8784 status_t
_kern_create_index(dev_t device,const char * name,uint32 type,uint32 flags)8785 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8786 {
8787 	return index_create(device, name, type, flags, true);
8788 }
8789 
8790 
8791 status_t
_kern_read_index_stat(dev_t device,const char * name,struct stat * stat)8792 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8793 {
8794 	return index_name_read_stat(device, name, stat, true);
8795 }
8796 
8797 
8798 status_t
_kern_remove_index(dev_t device,const char * name)8799 _kern_remove_index(dev_t device, const char* name)
8800 {
8801 	return index_remove(device, name, true);
8802 }
8803 
8804 
8805 status_t
_kern_getcwd(char * buffer,size_t size)8806 _kern_getcwd(char* buffer, size_t size)
8807 {
8808 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8809 
8810 	// Call vfs to get current working directory
8811 	return get_cwd(buffer, size, true);
8812 }
8813 
8814 
8815 status_t
_kern_setcwd(int fd,const char * path)8816 _kern_setcwd(int fd, const char* path)
8817 {
8818 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8819 	if (pathBuffer.InitCheck() != B_OK)
8820 		return B_NO_MEMORY;
8821 
8822 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8823 }
8824 
8825 
8826 //	#pragma mark - userland syscalls
8827 
8828 
8829 dev_t
_user_mount(const char * userPath,const char * userDevice,const char * userFileSystem,uint32 flags,const char * userArgs,size_t argsLength)8830 _user_mount(const char* userPath, const char* userDevice,
8831 	const char* userFileSystem, uint32 flags, const char* userArgs,
8832 	size_t argsLength)
8833 {
8834 	char fileSystem[B_FILE_NAME_LENGTH];
8835 	KPath path, device;
8836 	char* args = NULL;
8837 	status_t status;
8838 
8839 	if (!IS_USER_ADDRESS(userPath))
8840 		return B_BAD_ADDRESS;
8841 
8842 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8843 		return B_NO_MEMORY;
8844 
8845 	status = user_copy_name(path.LockBuffer(), userPath,
8846 		B_PATH_NAME_LENGTH);
8847 	if (status != B_OK)
8848 		return status;
8849 	path.UnlockBuffer();
8850 
8851 	if (userFileSystem != NULL) {
8852 		if (!IS_USER_ADDRESS(userFileSystem))
8853 			return B_BAD_ADDRESS;
8854 
8855 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8856 		if (status != B_OK)
8857 			return status;
8858 	}
8859 
8860 	if (userDevice != NULL) {
8861 		if (!IS_USER_ADDRESS(userDevice))
8862 			return B_BAD_ADDRESS;
8863 
8864 		status = user_copy_name(device.LockBuffer(), userDevice,
8865 			B_PATH_NAME_LENGTH);
8866 		if (status != B_OK)
8867 			return status;
8868 		device.UnlockBuffer();
8869 	}
8870 
8871 	if (userArgs != NULL && argsLength > 0) {
8872 		if (!IS_USER_ADDRESS(userArgs))
8873 			return B_BAD_ADDRESS;
8874 
8875 		// this is a safety restriction
8876 		if (argsLength >= 65536)
8877 			return B_NAME_TOO_LONG;
8878 
8879 		args = (char*)malloc(argsLength + 1);
8880 		if (args == NULL)
8881 			return B_NO_MEMORY;
8882 
8883 		status = user_copy_name(args, userArgs, argsLength + 1);
8884 		if (status != B_OK) {
8885 			free(args);
8886 			return status;
8887 		}
8888 	}
8889 
8890 	status = fs_mount(path.LockBuffer(),
8891 		userDevice != NULL ? device.Path() : NULL,
8892 		userFileSystem ? fileSystem : NULL, flags, args, false);
8893 
8894 	free(args);
8895 	return status;
8896 }
8897 
8898 
8899 status_t
_user_unmount(const char * userPath,uint32 flags)8900 _user_unmount(const char* userPath, uint32 flags)
8901 {
8902 	if (!IS_USER_ADDRESS(userPath))
8903 		return B_BAD_ADDRESS;
8904 
8905 	KPath pathBuffer;
8906 	if (pathBuffer.InitCheck() != B_OK)
8907 		return B_NO_MEMORY;
8908 
8909 	char* path = pathBuffer.LockBuffer();
8910 
8911 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8912 	if (status != B_OK)
8913 		return status;
8914 
8915 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8916 }
8917 
8918 
8919 status_t
_user_read_fs_info(dev_t device,struct fs_info * userInfo)8920 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8921 {
8922 	struct fs_info info;
8923 	status_t status;
8924 
8925 	if (userInfo == NULL)
8926 		return B_BAD_VALUE;
8927 
8928 	if (!IS_USER_ADDRESS(userInfo))
8929 		return B_BAD_ADDRESS;
8930 
8931 	status = fs_read_info(device, &info);
8932 	if (status != B_OK)
8933 		return status;
8934 
8935 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8936 		return B_BAD_ADDRESS;
8937 
8938 	return B_OK;
8939 }
8940 
8941 
8942 status_t
_user_write_fs_info(dev_t device,const struct fs_info * userInfo,int mask)8943 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8944 {
8945 	struct fs_info info;
8946 
8947 	if (userInfo == NULL)
8948 		return B_BAD_VALUE;
8949 
8950 	if (!IS_USER_ADDRESS(userInfo)
8951 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8952 		return B_BAD_ADDRESS;
8953 
8954 	return fs_write_info(device, &info, mask);
8955 }
8956 
8957 
8958 dev_t
_user_next_device(int32 * _userCookie)8959 _user_next_device(int32* _userCookie)
8960 {
8961 	int32 cookie;
8962 	dev_t device;
8963 
8964 	if (!IS_USER_ADDRESS(_userCookie)
8965 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8966 		return B_BAD_ADDRESS;
8967 
8968 	device = fs_next_device(&cookie);
8969 
8970 	if (device >= B_OK) {
8971 		// update user cookie
8972 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8973 			return B_BAD_ADDRESS;
8974 	}
8975 
8976 	return device;
8977 }
8978 
8979 
8980 status_t
_user_sync(void)8981 _user_sync(void)
8982 {
8983 	return _kern_sync();
8984 }
8985 
8986 
8987 status_t
_user_get_next_fd_info(team_id team,uint32 * userCookie,fd_info * userInfo,size_t infoSize)8988 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8989 	size_t infoSize)
8990 {
8991 	struct fd_info info;
8992 	uint32 cookie;
8993 
8994 	// only root can do this
8995 	if (geteuid() != 0)
8996 		return B_NOT_ALLOWED;
8997 
8998 	if (infoSize != sizeof(fd_info))
8999 		return B_BAD_VALUE;
9000 
9001 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
9002 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
9003 		return B_BAD_ADDRESS;
9004 
9005 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
9006 	if (status != B_OK)
9007 		return status;
9008 
9009 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
9010 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
9011 		return B_BAD_ADDRESS;
9012 
9013 	return status;
9014 }
9015 
9016 
9017 status_t
_user_entry_ref_to_path(dev_t device,ino_t inode,const char * leaf,char * userPath,size_t pathLength)9018 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
9019 	char* userPath, size_t pathLength)
9020 {
9021 	if (!IS_USER_ADDRESS(userPath))
9022 		return B_BAD_ADDRESS;
9023 
9024 	KPath path;
9025 	if (path.InitCheck() != B_OK)
9026 		return B_NO_MEMORY;
9027 
9028 	// copy the leaf name onto the stack
9029 	char stackLeaf[B_FILE_NAME_LENGTH];
9030 	if (leaf != NULL) {
9031 		if (!IS_USER_ADDRESS(leaf))
9032 			return B_BAD_ADDRESS;
9033 
9034 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
9035 		if (status != B_OK)
9036 			return status;
9037 
9038 		leaf = stackLeaf;
9039 	}
9040 
9041 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
9042 		false, path.LockBuffer(), path.BufferSize());
9043 	if (status != B_OK)
9044 		return status;
9045 
9046 	path.UnlockBuffer();
9047 
9048 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9049 	if (length < 0)
9050 		return length;
9051 	if (length >= (int)pathLength)
9052 		return B_BUFFER_OVERFLOW;
9053 
9054 	return B_OK;
9055 }
9056 
9057 
9058 status_t
_user_normalize_path(const char * userPath,bool traverseLink,char * buffer)9059 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9060 {
9061 	if (userPath == NULL || buffer == NULL)
9062 		return B_BAD_VALUE;
9063 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9064 		return B_BAD_ADDRESS;
9065 
9066 	// copy path from userland
9067 	KPath pathBuffer;
9068 	if (pathBuffer.InitCheck() != B_OK)
9069 		return B_NO_MEMORY;
9070 	char* path = pathBuffer.LockBuffer();
9071 
9072 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9073 	if (status != B_OK)
9074 		return status;
9075 
9076 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9077 		false);
9078 	if (error != B_OK)
9079 		return error;
9080 
9081 	// copy back to userland
9082 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9083 	if (len < 0)
9084 		return len;
9085 	if (len >= B_PATH_NAME_LENGTH)
9086 		return B_BUFFER_OVERFLOW;
9087 
9088 	return B_OK;
9089 }
9090 
9091 
9092 int
_user_open_entry_ref(dev_t device,ino_t inode,const char * userName,int openMode,int perms)9093 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9094 	int openMode, int perms)
9095 {
9096 	char name[B_FILE_NAME_LENGTH];
9097 
9098 	if (userName == NULL || device < 0 || inode < 0)
9099 		return B_BAD_VALUE;
9100 	if (!IS_USER_ADDRESS(userName))
9101 		return B_BAD_ADDRESS;
9102 	status_t status = user_copy_name(name, userName, sizeof(name));
9103 	if (status != B_OK)
9104 		return status;
9105 
9106 	if ((openMode & O_CREAT) != 0) {
9107 		return file_create_entry_ref(device, inode, name, openMode, perms,
9108 			false);
9109 	}
9110 
9111 	return file_open_entry_ref(device, inode, name, openMode, false);
9112 }
9113 
9114 
9115 int
_user_open(int fd,const char * userPath,int openMode,int perms)9116 _user_open(int fd, const char* userPath, int openMode, int perms)
9117 {
9118 	KPath path;
9119 	if (path.InitCheck() != B_OK)
9120 		return B_NO_MEMORY;
9121 
9122 	char* buffer = path.LockBuffer();
9123 
9124 	if (!IS_USER_ADDRESS(userPath))
9125 		return B_BAD_ADDRESS;
9126 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9127 	if (status != B_OK)
9128 		return status;
9129 
9130 	if ((openMode & O_CREAT) != 0)
9131 		return file_create(fd, buffer, openMode, perms, false);
9132 
9133 	return file_open(fd, buffer, openMode, false);
9134 }
9135 
9136 
9137 int
_user_open_dir_entry_ref(dev_t device,ino_t inode,const char * userName)9138 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9139 {
9140 	if (userName != NULL) {
9141 		char name[B_FILE_NAME_LENGTH];
9142 
9143 		if (!IS_USER_ADDRESS(userName))
9144 			return B_BAD_ADDRESS;
9145 		status_t status = user_copy_name(name, userName, sizeof(name));
9146 		if (status != B_OK)
9147 			return status;
9148 
9149 		return dir_open_entry_ref(device, inode, name, false);
9150 	}
9151 	return dir_open_entry_ref(device, inode, NULL, false);
9152 }
9153 
9154 
9155 int
_user_open_dir(int fd,const char * userPath)9156 _user_open_dir(int fd, const char* userPath)
9157 {
9158 	if (userPath == NULL)
9159 		return dir_open(fd, NULL, false);
9160 
9161 	KPath path;
9162 	if (path.InitCheck() != B_OK)
9163 		return B_NO_MEMORY;
9164 
9165 	char* buffer = path.LockBuffer();
9166 
9167 	if (!IS_USER_ADDRESS(userPath))
9168 		return B_BAD_ADDRESS;
9169 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9170 	if (status != B_OK)
9171 		return status;
9172 
9173 	return dir_open(fd, buffer, false);
9174 }
9175 
9176 
9177 /*!	\brief Opens a directory's parent directory and returns the entry name
9178 		   of the former.
9179 
9180 	Aside from that it returns the directory's entry name, this method is
9181 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9182 	equivalent, if \a userName is \c NULL.
9183 
9184 	If a name buffer is supplied and the name does not fit the buffer, the
9185 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9186 
9187 	\param fd A FD referring to a directory.
9188 	\param userName Buffer the directory's entry name shall be written into.
9189 		   May be \c NULL.
9190 	\param nameLength Size of the name buffer.
9191 	\return The file descriptor of the opened parent directory, if everything
9192 			went fine, an error code otherwise.
9193 */
9194 int
_user_open_parent_dir(int fd,char * userName,size_t nameLength)9195 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9196 {
9197 	bool kernel = false;
9198 
9199 	if (userName && !IS_USER_ADDRESS(userName))
9200 		return B_BAD_ADDRESS;
9201 
9202 	// open the parent dir
9203 	int parentFD = dir_open(fd, (char*)"..", kernel);
9204 	if (parentFD < 0)
9205 		return parentFD;
9206 	FDCloser fdCloser(parentFD, kernel);
9207 
9208 	if (userName) {
9209 		// get the vnodes
9210 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9211 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9212 		VnodePutter parentVNodePutter(parentVNode);
9213 		VnodePutter dirVNodePutter(dirVNode);
9214 		if (!parentVNode || !dirVNode)
9215 			return B_FILE_ERROR;
9216 
9217 		// get the vnode name
9218 		char _buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
9219 		struct dirent* buffer = (struct dirent*)_buffer;
9220 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9221 			sizeof(_buffer), get_current_io_context(false));
9222 		if (status != B_OK)
9223 			return status;
9224 
9225 		// copy the name to the userland buffer
9226 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9227 		if (len < 0)
9228 			return len;
9229 		if (len >= (int)nameLength)
9230 			return B_BUFFER_OVERFLOW;
9231 	}
9232 
9233 	return fdCloser.Detach();
9234 }
9235 
9236 
9237 status_t
_user_fcntl(int fd,int op,size_t argument)9238 _user_fcntl(int fd, int op, size_t argument)
9239 {
9240 	status_t status = common_fcntl(fd, op, argument, false);
9241 	if (op == F_SETLKW)
9242 		syscall_restart_handle_post(status);
9243 
9244 	return status;
9245 }
9246 
9247 
9248 status_t
_user_fsync(int fd)9249 _user_fsync(int fd)
9250 {
9251 	return common_sync(fd, false);
9252 }
9253 
9254 
9255 status_t
_user_flock(int fd,int operation)9256 _user_flock(int fd, int operation)
9257 {
9258 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9259 
9260 	// Check if the operation is valid
9261 	switch (operation & ~LOCK_NB) {
9262 		case LOCK_UN:
9263 		case LOCK_SH:
9264 		case LOCK_EX:
9265 			break;
9266 
9267 		default:
9268 			return B_BAD_VALUE;
9269 	}
9270 
9271 	struct vnode* vnode;
9272 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, false));
9273 	if (!descriptor.IsSet())
9274 		return B_FILE_ERROR;
9275 
9276 	if (descriptor->ops != &sFileOps)
9277 		return B_BAD_VALUE;
9278 
9279 	struct flock flock;
9280 	flock.l_start = 0;
9281 	flock.l_len = OFF_MAX;
9282 	flock.l_whence = 0;
9283 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9284 
9285 	status_t status;
9286 	if ((operation & LOCK_UN) != 0) {
9287 		if (HAS_FS_CALL(vnode, release_lock))
9288 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9289 		else
9290 			status = release_advisory_lock(vnode, NULL, descriptor.Get(), &flock);
9291 	} else {
9292 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9293 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9294 				(operation & LOCK_NB) == 0);
9295 		} else {
9296 			status = acquire_advisory_lock(vnode, NULL, descriptor.Get(), &flock,
9297 				(operation & LOCK_NB) == 0);
9298 		}
9299 	}
9300 
9301 	syscall_restart_handle_post(status);
9302 
9303 	return status;
9304 }
9305 
9306 
9307 status_t
_user_lock_node(int fd)9308 _user_lock_node(int fd)
9309 {
9310 	return common_lock_node(fd, false);
9311 }
9312 
9313 
9314 status_t
_user_unlock_node(int fd)9315 _user_unlock_node(int fd)
9316 {
9317 	return common_unlock_node(fd, false);
9318 }
9319 
9320 
9321 status_t
_user_preallocate(int fd,off_t offset,off_t length)9322 _user_preallocate(int fd, off_t offset, off_t length)
9323 {
9324 	return common_preallocate(fd, offset, length, false);
9325 }
9326 
9327 
9328 status_t
_user_create_dir_entry_ref(dev_t device,ino_t inode,const char * userName,int perms)9329 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9330 	int perms)
9331 {
9332 	char name[B_FILE_NAME_LENGTH];
9333 	status_t status;
9334 
9335 	if (!IS_USER_ADDRESS(userName))
9336 		return B_BAD_ADDRESS;
9337 
9338 	status = user_copy_name(name, userName, sizeof(name));
9339 	if (status != B_OK)
9340 		return status;
9341 
9342 	return dir_create_entry_ref(device, inode, name, perms, false);
9343 }
9344 
9345 
9346 status_t
_user_create_dir(int fd,const char * userPath,int perms)9347 _user_create_dir(int fd, const char* userPath, int perms)
9348 {
9349 	KPath pathBuffer;
9350 	if (pathBuffer.InitCheck() != B_OK)
9351 		return B_NO_MEMORY;
9352 
9353 	char* path = pathBuffer.LockBuffer();
9354 
9355 	if (!IS_USER_ADDRESS(userPath))
9356 		return B_BAD_ADDRESS;
9357 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9358 	if (status != B_OK)
9359 		return status;
9360 
9361 	return dir_create(fd, path, perms, false);
9362 }
9363 
9364 
9365 status_t
_user_remove_dir(int fd,const char * userPath)9366 _user_remove_dir(int fd, const char* userPath)
9367 {
9368 	KPath pathBuffer;
9369 	if (pathBuffer.InitCheck() != B_OK)
9370 		return B_NO_MEMORY;
9371 
9372 	char* path = pathBuffer.LockBuffer();
9373 
9374 	if (userPath != NULL) {
9375 		if (!IS_USER_ADDRESS(userPath))
9376 			return B_BAD_ADDRESS;
9377 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9378 		if (status != B_OK)
9379 			return status;
9380 	}
9381 
9382 	return dir_remove(fd, userPath ? path : NULL, false);
9383 }
9384 
9385 
9386 status_t
_user_read_link(int fd,const char * userPath,char * userBuffer,size_t * userBufferSize)9387 _user_read_link(int fd, const char* userPath, char* userBuffer,
9388 	size_t* userBufferSize)
9389 {
9390 	KPath pathBuffer, linkBuffer;
9391 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9392 		return B_NO_MEMORY;
9393 
9394 	size_t bufferSize;
9395 
9396 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9397 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9398 		return B_BAD_ADDRESS;
9399 
9400 	char* path = pathBuffer.LockBuffer();
9401 	char* buffer = linkBuffer.LockBuffer();
9402 
9403 	if (userPath) {
9404 		if (!IS_USER_ADDRESS(userPath))
9405 			return B_BAD_ADDRESS;
9406 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9407 		if (status != B_OK)
9408 			return status;
9409 
9410 		if (bufferSize > B_PATH_NAME_LENGTH)
9411 			bufferSize = B_PATH_NAME_LENGTH;
9412 	}
9413 
9414 	size_t newBufferSize = bufferSize;
9415 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9416 		&newBufferSize, false);
9417 
9418 	// we also update the bufferSize in case of errors
9419 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9420 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9421 		return B_BAD_ADDRESS;
9422 
9423 	if (status != B_OK)
9424 		return status;
9425 
9426 	bufferSize = min_c(newBufferSize, bufferSize);
9427 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9428 		return B_BAD_ADDRESS;
9429 
9430 	return B_OK;
9431 }
9432 
9433 
9434 status_t
_user_create_symlink(int fd,const char * userPath,const char * userToPath,int mode)9435 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9436 	int mode)
9437 {
9438 	KPath pathBuffer;
9439 	KPath toPathBuffer;
9440 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9441 		return B_NO_MEMORY;
9442 
9443 	char* path = pathBuffer.LockBuffer();
9444 	char* toPath = toPathBuffer.LockBuffer();
9445 
9446 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9447 		return B_BAD_ADDRESS;
9448 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9449 	if (status != B_OK)
9450 		return status;
9451 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9452 	if (status != B_OK)
9453 		return status;
9454 
9455 	return common_create_symlink(fd, path, toPath, mode, false);
9456 }
9457 
9458 
9459 status_t
_user_create_link(int pathFD,const char * userPath,int toFD,const char * userToPath,bool traverseLeafLink)9460 _user_create_link(int pathFD, const char* userPath, int toFD,
9461 	const char* userToPath, bool traverseLeafLink)
9462 {
9463 	KPath pathBuffer;
9464 	KPath toPathBuffer;
9465 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9466 		return B_NO_MEMORY;
9467 
9468 	char* path = pathBuffer.LockBuffer();
9469 	char* toPath = toPathBuffer.LockBuffer();
9470 
9471 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9472 		return B_BAD_ADDRESS;
9473 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9474 	if (status != B_OK)
9475 		return status;
9476 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9477 	if (status != B_OK)
9478 		return status;
9479 
9480 	status = check_path(toPath);
9481 	if (status != B_OK)
9482 		return status;
9483 
9484 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9485 		false);
9486 }
9487 
9488 
9489 status_t
_user_unlink(int fd,const char * userPath)9490 _user_unlink(int fd, const char* userPath)
9491 {
9492 	KPath pathBuffer;
9493 	if (pathBuffer.InitCheck() != B_OK)
9494 		return B_NO_MEMORY;
9495 
9496 	char* path = pathBuffer.LockBuffer();
9497 
9498 	if (!IS_USER_ADDRESS(userPath))
9499 		return B_BAD_ADDRESS;
9500 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9501 	if (status != B_OK)
9502 		return status;
9503 
9504 	return common_unlink(fd, path, false);
9505 }
9506 
9507 
9508 status_t
_user_rename(int oldFD,const char * userOldPath,int newFD,const char * userNewPath)9509 _user_rename(int oldFD, const char* userOldPath, int newFD,
9510 	const char* userNewPath)
9511 {
9512 	KPath oldPathBuffer;
9513 	KPath newPathBuffer;
9514 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9515 		return B_NO_MEMORY;
9516 
9517 	char* oldPath = oldPathBuffer.LockBuffer();
9518 	char* newPath = newPathBuffer.LockBuffer();
9519 
9520 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9521 		return B_BAD_ADDRESS;
9522 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9523 	if (status != B_OK)
9524 		return status;
9525 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9526 	if (status != B_OK)
9527 		return status;
9528 
9529 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9530 }
9531 
9532 
9533 status_t
_user_create_fifo(int fd,const char * userPath,mode_t perms)9534 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9535 {
9536 	KPath pathBuffer;
9537 	if (pathBuffer.InitCheck() != B_OK)
9538 		return B_NO_MEMORY;
9539 
9540 	char* path = pathBuffer.LockBuffer();
9541 
9542 	if (!IS_USER_ADDRESS(userPath))
9543 		return B_BAD_ADDRESS;
9544 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9545 	if (status != B_OK)
9546 		return status;
9547 
9548 	// split into directory vnode and filename path
9549 	char filename[B_FILE_NAME_LENGTH];
9550 	VnodePutter dir;
9551 	status = fd_and_path_to_dir_vnode(fd, path, dir, filename, false);
9552 	if (status != B_OK)
9553 		return status;
9554 
9555 	// the underlying FS needs to support creating FIFOs
9556 	if (!HAS_FS_CALL(dir, create_special_node))
9557 		return B_UNSUPPORTED;
9558 
9559 	// create the entry	-- the FIFO sub node is set up automatically
9560 	fs_vnode superVnode;
9561 	ino_t nodeID;
9562 	status = FS_CALL(dir.Get(), create_special_node, filename, NULL,
9563 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9564 
9565 	// create_special_node() acquired a reference for us that we don't need.
9566 	if (status == B_OK)
9567 		put_vnode(dir->mount->volume, nodeID);
9568 
9569 	return status;
9570 }
9571 
9572 
9573 status_t
_user_create_pipe(int * userFDs,int flags)9574 _user_create_pipe(int* userFDs, int flags)
9575 {
9576 	// check acceptable flags
9577 	if ((flags & ~(O_NONBLOCK | O_CLOEXEC)) != 0)
9578 		return B_BAD_VALUE;
9579 
9580 	// rootfs should support creating FIFOs, but let's be sure
9581 	if (!HAS_FS_CALL(sRoot, create_special_node))
9582 		return B_UNSUPPORTED;
9583 
9584 	// create the node	-- the FIFO sub node is set up automatically
9585 	fs_vnode superVnode;
9586 	ino_t nodeID;
9587 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9588 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9589 	if (status != B_OK)
9590 		return status;
9591 
9592 	// We've got one reference to the node and need another one.
9593 	struct vnode* vnode;
9594 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9595 	if (status != B_OK) {
9596 		// that should not happen
9597 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9598 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9599 		return status;
9600 	}
9601 
9602 	// Everything looks good so far. Open two FDs for reading respectively
9603 	// writing, O_NONBLOCK to avoid blocking on open with O_RDONLY
9604 	int fds[2];
9605 	fds[0] = open_vnode(vnode, O_RDONLY | O_NONBLOCK | flags, false);
9606 	fds[1] = open_vnode(vnode, O_WRONLY | flags, false);
9607 	// Reset O_NONBLOCK if requested
9608 	if ((flags & O_NONBLOCK) == 0)
9609 		common_fcntl(fds[0], F_SETFL, flags & O_NONBLOCK, false);
9610 
9611 	FDCloser closer0(fds[0], false);
9612 	FDCloser closer1(fds[1], false);
9613 
9614 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9615 
9616 	// copy FDs to userland
9617 	if (status == B_OK) {
9618 		if (!IS_USER_ADDRESS(userFDs)
9619 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9620 			status = B_BAD_ADDRESS;
9621 		}
9622 	}
9623 
9624 	// keep FDs, if everything went fine
9625 	if (status == B_OK) {
9626 		closer0.Detach();
9627 		closer1.Detach();
9628 	}
9629 
9630 	return status;
9631 }
9632 
9633 
9634 status_t
_user_access(int fd,const char * userPath,int mode,bool effectiveUserGroup)9635 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9636 {
9637 	KPath pathBuffer;
9638 	if (pathBuffer.InitCheck() != B_OK)
9639 		return B_NO_MEMORY;
9640 
9641 	char* path = pathBuffer.LockBuffer();
9642 
9643 	if (!IS_USER_ADDRESS(userPath))
9644 		return B_BAD_ADDRESS;
9645 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9646 	if (status != B_OK)
9647 		return status;
9648 
9649 	return common_access(fd, path, mode, effectiveUserGroup, false);
9650 }
9651 
9652 
9653 status_t
_user_read_stat(int fd,const char * userPath,bool traverseLink,struct stat * userStat,size_t statSize)9654 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9655 	struct stat* userStat, size_t statSize)
9656 {
9657 	struct stat stat = {0};
9658 	status_t status;
9659 
9660 	if (statSize > sizeof(struct stat))
9661 		return B_BAD_VALUE;
9662 
9663 	if (!IS_USER_ADDRESS(userStat))
9664 		return B_BAD_ADDRESS;
9665 
9666 	if (userPath != NULL) {
9667 		// path given: get the stat of the node referred to by (fd, path)
9668 		if (!IS_USER_ADDRESS(userPath))
9669 			return B_BAD_ADDRESS;
9670 
9671 		KPath pathBuffer;
9672 		if (pathBuffer.InitCheck() != B_OK)
9673 			return B_NO_MEMORY;
9674 
9675 		char* path = pathBuffer.LockBuffer();
9676 
9677 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9678 		if (status != B_OK)
9679 			return status;
9680 
9681 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9682 	} else {
9683 		// no path given: get the FD and use the FD operation
9684 		FileDescriptorPutter descriptor
9685 			(get_fd(get_current_io_context(false), fd));
9686 		if (!descriptor.IsSet())
9687 			return B_FILE_ERROR;
9688 
9689 		if (descriptor->ops->fd_read_stat)
9690 			status = descriptor->ops->fd_read_stat(descriptor.Get(), &stat);
9691 		else
9692 			status = B_UNSUPPORTED;
9693 	}
9694 
9695 	if (status != B_OK)
9696 		return status;
9697 
9698 	return user_memcpy(userStat, &stat, statSize);
9699 }
9700 
9701 
9702 status_t
_user_write_stat(int fd,const char * userPath,bool traverseLeafLink,const struct stat * userStat,size_t statSize,int statMask)9703 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9704 	const struct stat* userStat, size_t statSize, int statMask)
9705 {
9706 	if (statSize > sizeof(struct stat))
9707 		return B_BAD_VALUE;
9708 
9709 	struct stat stat;
9710 
9711 	if (!IS_USER_ADDRESS(userStat)
9712 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9713 		return B_BAD_ADDRESS;
9714 
9715 	// clear additional stat fields
9716 	if (statSize < sizeof(struct stat))
9717 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9718 
9719 	status_t status;
9720 
9721 	if (userPath != NULL) {
9722 		// path given: write the stat of the node referred to by (fd, path)
9723 		if (!IS_USER_ADDRESS(userPath))
9724 			return B_BAD_ADDRESS;
9725 
9726 		KPath pathBuffer;
9727 		if (pathBuffer.InitCheck() != B_OK)
9728 			return B_NO_MEMORY;
9729 
9730 		char* path = pathBuffer.LockBuffer();
9731 
9732 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9733 		if (status != B_OK)
9734 			return status;
9735 
9736 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9737 			statMask, false);
9738 	} else {
9739 		// no path given: get the FD and use the FD operation
9740 		FileDescriptorPutter descriptor
9741 			(get_fd(get_current_io_context(false), fd));
9742 		if (!descriptor.IsSet())
9743 			return B_FILE_ERROR;
9744 
9745 		if (descriptor->ops->fd_write_stat) {
9746 			status = descriptor->ops->fd_write_stat(descriptor.Get(), &stat,
9747 				statMask);
9748 		} else
9749 			status = B_UNSUPPORTED;
9750 	}
9751 
9752 	return status;
9753 }
9754 
9755 
9756 int
_user_open_attr_dir(int fd,const char * userPath,bool traverseLeafLink)9757 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9758 {
9759 	KPath pathBuffer;
9760 	if (pathBuffer.InitCheck() != B_OK)
9761 		return B_NO_MEMORY;
9762 
9763 	char* path = pathBuffer.LockBuffer();
9764 
9765 	if (userPath != NULL) {
9766 		if (!IS_USER_ADDRESS(userPath))
9767 			return B_BAD_ADDRESS;
9768 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9769 		if (status != B_OK)
9770 			return status;
9771 	}
9772 
9773 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9774 }
9775 
9776 
9777 ssize_t
_user_read_attr(int fd,const char * userAttribute,off_t pos,void * userBuffer,size_t readBytes)9778 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9779 	size_t readBytes)
9780 {
9781 	char attribute[B_FILE_NAME_LENGTH];
9782 
9783 	if (userAttribute == NULL)
9784 		return B_BAD_VALUE;
9785 	if (!IS_USER_ADDRESS(userAttribute))
9786 		return B_BAD_ADDRESS;
9787 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9788 	if (status != B_OK)
9789 		return status;
9790 
9791 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9792 	if (attr < 0)
9793 		return attr;
9794 
9795 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9796 	_user_close(attr);
9797 
9798 	return bytes;
9799 }
9800 
9801 
9802 ssize_t
_user_write_attr(int fd,const char * userAttribute,uint32 type,off_t pos,const void * buffer,size_t writeBytes)9803 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9804 	const void* buffer, size_t writeBytes)
9805 {
9806 	char attribute[B_FILE_NAME_LENGTH];
9807 
9808 	if (userAttribute == NULL)
9809 		return B_BAD_VALUE;
9810 	if (!IS_USER_ADDRESS(userAttribute))
9811 		return B_BAD_ADDRESS;
9812 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9813 	if (status != B_OK)
9814 		return status;
9815 
9816 	// Try to support the BeOS typical truncation as well as the position
9817 	// argument
9818 	int attr = attr_create(fd, NULL, attribute, type,
9819 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9820 	if (attr < 0)
9821 		return attr;
9822 
9823 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9824 	_user_close(attr);
9825 
9826 	return bytes;
9827 }
9828 
9829 
9830 status_t
_user_stat_attr(int fd,const char * userAttribute,struct attr_info * userAttrInfo)9831 _user_stat_attr(int fd, const char* userAttribute,
9832 	struct attr_info* userAttrInfo)
9833 {
9834 	char attribute[B_FILE_NAME_LENGTH];
9835 
9836 	if (userAttribute == NULL || userAttrInfo == NULL)
9837 		return B_BAD_VALUE;
9838 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9839 		return B_BAD_ADDRESS;
9840 	status_t status = user_copy_name(attribute, userAttribute,
9841 		sizeof(attribute));
9842 	if (status != B_OK)
9843 		return status;
9844 
9845 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9846 	if (attr < 0)
9847 		return attr;
9848 
9849 	struct file_descriptor* descriptor
9850 		= get_fd(get_current_io_context(false), attr);
9851 	if (descriptor == NULL) {
9852 		_user_close(attr);
9853 		return B_FILE_ERROR;
9854 	}
9855 
9856 	struct stat stat;
9857 	if (descriptor->ops->fd_read_stat)
9858 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9859 	else
9860 		status = B_UNSUPPORTED;
9861 
9862 	put_fd(descriptor);
9863 	_user_close(attr);
9864 
9865 	if (status == B_OK) {
9866 		attr_info info;
9867 		info.type = stat.st_type;
9868 		info.size = stat.st_size;
9869 
9870 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9871 			return B_BAD_ADDRESS;
9872 	}
9873 
9874 	return status;
9875 }
9876 
9877 
9878 int
_user_open_attr(int fd,const char * userPath,const char * userName,uint32 type,int openMode)9879 _user_open_attr(int fd, const char* userPath, const char* userName,
9880 	uint32 type, int openMode)
9881 {
9882 	char name[B_FILE_NAME_LENGTH];
9883 
9884 	if (!IS_USER_ADDRESS(userName))
9885 		return B_BAD_ADDRESS;
9886 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9887 	if (status != B_OK)
9888 		return status;
9889 
9890 	KPath pathBuffer;
9891 	if (pathBuffer.InitCheck() != B_OK)
9892 		return B_NO_MEMORY;
9893 
9894 	char* path = pathBuffer.LockBuffer();
9895 
9896 	if (userPath != NULL) {
9897 		if (!IS_USER_ADDRESS(userPath))
9898 			return B_BAD_ADDRESS;
9899 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9900 		if (status != B_OK)
9901 			return status;
9902 	}
9903 
9904 	if ((openMode & O_CREAT) != 0) {
9905 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9906 			false);
9907 	}
9908 
9909 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9910 }
9911 
9912 
9913 status_t
_user_remove_attr(int fd,const char * userName)9914 _user_remove_attr(int fd, const char* userName)
9915 {
9916 	char name[B_FILE_NAME_LENGTH];
9917 
9918 	if (!IS_USER_ADDRESS(userName))
9919 		return B_BAD_ADDRESS;
9920 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9921 	if (status != B_OK)
9922 		return status;
9923 
9924 	return attr_remove(fd, name, false);
9925 }
9926 
9927 
9928 status_t
_user_rename_attr(int fromFile,const char * userFromName,int toFile,const char * userToName)9929 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9930 	const char* userToName)
9931 {
9932 	if (!IS_USER_ADDRESS(userFromName)
9933 		|| !IS_USER_ADDRESS(userToName))
9934 		return B_BAD_ADDRESS;
9935 
9936 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9937 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9938 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9939 		return B_NO_MEMORY;
9940 
9941 	char* fromName = fromNameBuffer.LockBuffer();
9942 	char* toName = toNameBuffer.LockBuffer();
9943 
9944 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
9945 	if (status != B_OK)
9946 		return status;
9947 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
9948 	if (status != B_OK)
9949 		return status;
9950 
9951 	return attr_rename(fromFile, fromName, toFile, toName, false);
9952 }
9953 
9954 
9955 int
_user_open_index_dir(dev_t device)9956 _user_open_index_dir(dev_t device)
9957 {
9958 	return index_dir_open(device, false);
9959 }
9960 
9961 
9962 status_t
_user_create_index(dev_t device,const char * userName,uint32 type,uint32 flags)9963 _user_create_index(dev_t device, const char* userName, uint32 type,
9964 	uint32 flags)
9965 {
9966 	char name[B_FILE_NAME_LENGTH];
9967 
9968 	if (!IS_USER_ADDRESS(userName))
9969 		return B_BAD_ADDRESS;
9970 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9971 	if (status != B_OK)
9972 		return status;
9973 
9974 	return index_create(device, name, type, flags, false);
9975 }
9976 
9977 
9978 status_t
_user_read_index_stat(dev_t device,const char * userName,struct stat * userStat)9979 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9980 {
9981 	char name[B_FILE_NAME_LENGTH];
9982 	struct stat stat = {0};
9983 	status_t status;
9984 
9985 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
9986 		return B_BAD_ADDRESS;
9987 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9988 	if (status != B_OK)
9989 		return status;
9990 
9991 	status = index_name_read_stat(device, name, &stat, false);
9992 	if (status == B_OK) {
9993 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9994 			return B_BAD_ADDRESS;
9995 	}
9996 
9997 	return status;
9998 }
9999 
10000 
10001 status_t
_user_remove_index(dev_t device,const char * userName)10002 _user_remove_index(dev_t device, const char* userName)
10003 {
10004 	char name[B_FILE_NAME_LENGTH];
10005 
10006 	if (!IS_USER_ADDRESS(userName))
10007 		return B_BAD_ADDRESS;
10008 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10009 	if (status != B_OK)
10010 		return status;
10011 
10012 	return index_remove(device, name, false);
10013 }
10014 
10015 
10016 status_t
_user_getcwd(char * userBuffer,size_t size)10017 _user_getcwd(char* userBuffer, size_t size)
10018 {
10019 	if (size == 0)
10020 		return B_BAD_VALUE;
10021 	if (!IS_USER_ADDRESS(userBuffer))
10022 		return B_BAD_ADDRESS;
10023 
10024 	if (size > kMaxPathLength)
10025 		size = kMaxPathLength;
10026 
10027 	KPath pathBuffer(size);
10028 	if (pathBuffer.InitCheck() != B_OK)
10029 		return B_NO_MEMORY;
10030 
10031 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
10032 
10033 	char* path = pathBuffer.LockBuffer();
10034 
10035 	status_t status = get_cwd(path, size, false);
10036 	if (status != B_OK)
10037 		return status;
10038 
10039 	// Copy back the result
10040 	if (user_strlcpy(userBuffer, path, size) < B_OK)
10041 		return B_BAD_ADDRESS;
10042 
10043 	return status;
10044 }
10045 
10046 
10047 status_t
_user_setcwd(int fd,const char * userPath)10048 _user_setcwd(int fd, const char* userPath)
10049 {
10050 	TRACE(("user_setcwd: path = %p\n", userPath));
10051 
10052 	KPath pathBuffer;
10053 	if (pathBuffer.InitCheck() != B_OK)
10054 		return B_NO_MEMORY;
10055 
10056 	char* path = pathBuffer.LockBuffer();
10057 
10058 	if (userPath != NULL) {
10059 		if (!IS_USER_ADDRESS(userPath))
10060 			return B_BAD_ADDRESS;
10061 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10062 		if (status != B_OK)
10063 			return status;
10064 	}
10065 
10066 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10067 }
10068 
10069 
10070 status_t
_user_change_root(const char * userPath)10071 _user_change_root(const char* userPath)
10072 {
10073 	// only root is allowed to chroot()
10074 	if (geteuid() != 0)
10075 		return B_NOT_ALLOWED;
10076 
10077 	// alloc path buffer
10078 	KPath pathBuffer;
10079 	if (pathBuffer.InitCheck() != B_OK)
10080 		return B_NO_MEMORY;
10081 
10082 	// copy userland path to kernel
10083 	char* path = pathBuffer.LockBuffer();
10084 	if (userPath != NULL) {
10085 		if (!IS_USER_ADDRESS(userPath))
10086 			return B_BAD_ADDRESS;
10087 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10088 		if (status != B_OK)
10089 			return status;
10090 	}
10091 
10092 	// get the vnode
10093 	VnodePutter vnode;
10094 	status_t status = path_to_vnode(path, true, vnode, NULL, false);
10095 	if (status != B_OK)
10096 		return status;
10097 
10098 	// set the new root
10099 	struct io_context* context = get_current_io_context(false);
10100 	mutex_lock(&sIOContextRootLock);
10101 	struct vnode* oldRoot = context->root;
10102 	context->root = vnode.Detach();
10103 	mutex_unlock(&sIOContextRootLock);
10104 
10105 	put_vnode(oldRoot);
10106 
10107 	return B_OK;
10108 }
10109 
10110 
10111 int
_user_open_query(dev_t device,const char * userQuery,size_t queryLength,uint32 flags,port_id port,int32 token)10112 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10113 	uint32 flags, port_id port, int32 token)
10114 {
10115 	if (device < 0 || userQuery == NULL || queryLength == 0)
10116 		return B_BAD_VALUE;
10117 
10118 	if (!IS_USER_ADDRESS(userQuery))
10119 		return B_BAD_ADDRESS;
10120 
10121 	// this is a safety restriction
10122 	if (queryLength >= 65536)
10123 		return B_NAME_TOO_LONG;
10124 
10125 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10126 	if (!query.IsValid())
10127 		return B_NO_MEMORY;
10128 
10129 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10130 		return B_BAD_ADDRESS;
10131 
10132 	return query_open(device, query, flags, port, token, false);
10133 }
10134 
10135 
10136 #include "vfs_request_io.cpp"
10137