xref: /haiku/src/system/kernel/fs/fd.cpp (revision 4a55cc230cf7566cadcbb23b1928eefff8aea9a2)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 #include <AutoDeleterDrivers.h>
20 #include <BytePointer.h>
21 
22 #include <syscalls.h>
23 #include <syscall_restart.h>
24 #include <slab/Slab.h>
25 #include <util/AutoLock.h>
26 #include <util/iovec_support.h>
27 #include <vfs.h>
28 #include <wait_for_objects.h>
29 
30 #include "vfs_tracing.h"
31 
32 
33 //#define TRACE_FD
34 #ifdef TRACE_FD
35 #	define TRACE(x) dprintf x
36 #else
37 #	define TRACE(x)
38 #endif
39 
40 
41 static const size_t kMaxReadDirBufferSize = 64 * 1024;
42 
43 extern object_cache* sFileDescriptorCache;
44 
45 
46 static struct file_descriptor* get_fd_locked(struct io_context* context,
47 	int fd);
48 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
49 static void deselect_select_infos(file_descriptor* descriptor,
50 	select_info* infos, bool putSyncObjects);
51 
52 
53 struct FDGetterLocking {
54 	inline bool Lock(file_descriptor* /*lockable*/)
55 	{
56 		return false;
57 	}
58 
59 	inline void Unlock(file_descriptor* lockable)
60 	{
61 		put_fd(lockable);
62 	}
63 };
64 
65 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
66 public:
67 	inline FDGetter()
68 		: AutoLocker<file_descriptor, FDGetterLocking>()
69 	{
70 	}
71 
72 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
73 		: AutoLocker<file_descriptor, FDGetterLocking>(
74 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
75 	{
76 	}
77 
78 	inline file_descriptor* SetTo(io_context* context, int fd,
79 		bool contextLocked = false)
80 	{
81 		file_descriptor* descriptor
82 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
83 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
84 		return descriptor;
85 	}
86 
87 	inline file_descriptor* SetTo(int fd, bool kernel,
88 		bool contextLocked = false)
89 	{
90 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
91 	}
92 
93 	inline file_descriptor* FD() const
94 	{
95 		return fLockable;
96 	}
97 };
98 
99 
100 //	#pragma mark - General fd routines
101 
102 
103 #ifdef DEBUG
104 void dump_fd(int fd, struct file_descriptor* descriptor);
105 
106 void
107 dump_fd(int fd,struct file_descriptor* descriptor)
108 {
109 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
110 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
111 		", pos = %" B_PRId64 "\n",
112 		fd, descriptor, descriptor->type, descriptor->ref_count,
113 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
114 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
115 }
116 #endif
117 
118 
119 /*! Allocates and initializes a new file_descriptor.
120 */
121 struct file_descriptor*
122 alloc_fd(void)
123 {
124 	file_descriptor* descriptor
125 		= (file_descriptor*)object_cache_alloc(sFileDescriptorCache, 0);
126 	if (descriptor == NULL)
127 		return NULL;
128 
129 	descriptor->u.vnode = NULL;
130 	descriptor->cookie = NULL;
131 	descriptor->ref_count = 1;
132 	descriptor->open_count = 0;
133 	descriptor->open_mode = 0;
134 	descriptor->pos = 0;
135 
136 	return descriptor;
137 }
138 
139 
140 bool
141 fd_close_on_exec(struct io_context* context, int fd)
142 {
143 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
144 }
145 
146 
147 void
148 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
149 {
150 	if (closeFD)
151 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
152 	else
153 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
154 }
155 
156 
157 /*!	Searches a free slot in the FD table of the provided I/O context, and
158 	inserts the specified descriptor into it.
159 */
160 int
161 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
162 	int firstIndex)
163 {
164 	int fd = -1;
165 	uint32 i;
166 
167 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
168 		return B_BAD_VALUE;
169 
170 	mutex_lock(&context->io_mutex);
171 
172 	for (i = firstIndex; i < context->table_size; i++) {
173 		if (!context->fds[i]) {
174 			fd = i;
175 			break;
176 		}
177 	}
178 	if (fd < 0) {
179 		fd = B_NO_MORE_FDS;
180 		goto err;
181 	}
182 
183 	TFD(NewFD(context, fd, descriptor));
184 
185 	context->fds[fd] = descriptor;
186 	context->num_used_fds++;
187 	atomic_add(&descriptor->open_count, 1);
188 
189 err:
190 	mutex_unlock(&context->io_mutex);
191 
192 	return fd;
193 }
194 
195 
196 int
197 new_fd(struct io_context* context, struct file_descriptor* descriptor)
198 {
199 	return new_fd_etc(context, descriptor, 0);
200 }
201 
202 
203 /*!	Reduces the descriptor's reference counter, and frees all resources
204 	when it's no longer used.
205 */
206 void
207 put_fd(struct file_descriptor* descriptor)
208 {
209 	int32 previous = atomic_add(&descriptor->ref_count, -1);
210 
211 	TFD(PutFD(descriptor));
212 
213 	TRACE(("put_fd(descriptor = %p [ref = %" B_PRId32 ", cookie = %p])\n",
214 		descriptor, descriptor->ref_count, descriptor->cookie));
215 
216 	// free the descriptor if we don't need it anymore
217 	if (previous == 1) {
218 		// free the underlying object
219 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
220 			descriptor->ops->fd_free(descriptor);
221 
222 		object_cache_free(sFileDescriptorCache, descriptor, 0);
223 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
224 		&& previous - 1 == descriptor->open_count
225 		&& descriptor->ops != NULL) {
226 		// the descriptor has been disconnected - it cannot
227 		// be accessed anymore, let's close it (no one is
228 		// currently accessing this descriptor)
229 
230 		if (descriptor->ops->fd_close)
231 			descriptor->ops->fd_close(descriptor);
232 		if (descriptor->ops->fd_free)
233 			descriptor->ops->fd_free(descriptor);
234 
235 		// prevent this descriptor from being closed/freed again
236 		descriptor->ops = NULL;
237 		descriptor->u.vnode = NULL;
238 
239 		// the file descriptor is kept intact, so that it's not
240 		// reused until someone explicitly closes it
241 	}
242 }
243 
244 
245 /*!	Decrements the open counter of the file descriptor and invokes
246 	its close hook when appropriate.
247 */
248 void
249 close_fd(struct io_context* context, struct file_descriptor* descriptor)
250 {
251 	// POSIX advisory locks need to be released when any file descriptor closes
252 	if (descriptor->type == FDTYPE_FILE)
253 		vfs_release_posix_lock(context, descriptor);
254 
255 	if (atomic_add(&descriptor->open_count, -1) == 1) {
256 		vfs_unlock_vnode_if_locked(descriptor);
257 
258 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
259 			descriptor->ops->fd_close(descriptor);
260 	}
261 }
262 
263 
264 status_t
265 close_fd_index(struct io_context* context, int fd)
266 {
267 	struct file_descriptor* descriptor = remove_fd(context, fd);
268 
269 	if (descriptor == NULL)
270 		return B_FILE_ERROR;
271 
272 	close_fd(context, descriptor);
273 	put_fd(descriptor);
274 		// the reference associated with the slot
275 
276 	return B_OK;
277 }
278 
279 
280 /*!	This descriptor's underlying object will be closed and freed as soon as
281 	possible (in one of the next calls to put_fd() - get_fd() will no longer
282 	succeed on this descriptor).
283 	This is useful if the underlying object is gone, for instance when a
284 	(mounted) volume got removed unexpectedly.
285 */
286 void
287 disconnect_fd(struct file_descriptor* descriptor)
288 {
289 	descriptor->open_mode |= O_DISCONNECTED;
290 }
291 
292 
293 void
294 inc_fd_ref_count(struct file_descriptor* descriptor)
295 {
296 	atomic_add(&descriptor->ref_count, 1);
297 }
298 
299 
300 static struct file_descriptor*
301 get_fd_locked(struct io_context* context, int fd)
302 {
303 	if (fd < 0 || (uint32)fd >= context->table_size)
304 		return NULL;
305 
306 	struct file_descriptor* descriptor = context->fds[fd];
307 
308 	if (descriptor != NULL) {
309 		// disconnected descriptors cannot be accessed anymore
310 		if (descriptor->open_mode & O_DISCONNECTED)
311 			return NULL;
312 
313 		TFD(GetFD(context, fd, descriptor));
314 		inc_fd_ref_count(descriptor);
315 	}
316 
317 	return descriptor;
318 }
319 
320 
321 struct file_descriptor*
322 get_fd(struct io_context* context, int fd)
323 {
324 	MutexLocker _(context->io_mutex);
325 
326 	return get_fd_locked(context, fd);
327 }
328 
329 
330 struct file_descriptor*
331 get_open_fd(struct io_context* context, int fd)
332 {
333 	MutexLocker _(context->io_mutex);
334 
335 	file_descriptor* descriptor = get_fd_locked(context, fd);
336 	if (descriptor == NULL)
337 		return NULL;
338 
339 	atomic_add(&descriptor->open_count, 1);
340 
341 	return descriptor;
342 }
343 
344 
345 /*!	Removes the file descriptor from the specified slot.
346 */
347 static struct file_descriptor*
348 remove_fd(struct io_context* context, int fd)
349 {
350 	struct file_descriptor* descriptor = NULL;
351 
352 	if (fd < 0)
353 		return NULL;
354 
355 	mutex_lock(&context->io_mutex);
356 
357 	if ((uint32)fd < context->table_size)
358 		descriptor = context->fds[fd];
359 
360 	select_info* selectInfos = NULL;
361 	bool disconnected = false;
362 
363 	if (descriptor != NULL)	{
364 		// fd is valid
365 		TFD(RemoveFD(context, fd, descriptor));
366 
367 		context->fds[fd] = NULL;
368 		fd_set_close_on_exec(context, fd, false);
369 		context->num_used_fds--;
370 
371 		selectInfos = context->select_infos[fd];
372 		context->select_infos[fd] = NULL;
373 
374 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
375 	}
376 
377 	mutex_unlock(&context->io_mutex);
378 
379 	if (selectInfos != NULL)
380 		deselect_select_infos(descriptor, selectInfos, true);
381 
382 	return disconnected ? NULL : descriptor;
383 }
384 
385 
386 static int
387 dup_fd(int fd, bool kernel)
388 {
389 	struct io_context* context = get_current_io_context(kernel);
390 	struct file_descriptor* descriptor;
391 	int status;
392 
393 	TRACE(("dup_fd: fd = %d\n", fd));
394 
395 	// Try to get the fd structure
396 	descriptor = get_fd(context, fd);
397 	if (descriptor == NULL)
398 		return B_FILE_ERROR;
399 
400 	// now put the fd in place
401 	status = new_fd(context, descriptor);
402 	if (status < 0)
403 		put_fd(descriptor);
404 	else {
405 		mutex_lock(&context->io_mutex);
406 		fd_set_close_on_exec(context, status, false);
407 		mutex_unlock(&context->io_mutex);
408 	}
409 
410 	return status;
411 }
412 
413 
414 /*!	POSIX says this should be the same as:
415 		close(newfd);
416 		fcntl(oldfd, F_DUPFD, newfd);
417 
418 	We do dup2() directly to be thread-safe.
419 */
420 static int
421 dup2_fd(int oldfd, int newfd, bool kernel)
422 {
423 	struct file_descriptor* evicted = NULL;
424 	struct io_context* context;
425 
426 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
427 
428 	// quick check
429 	if (oldfd < 0 || newfd < 0)
430 		return B_FILE_ERROR;
431 
432 	// Get current I/O context and lock it
433 	context = get_current_io_context(kernel);
434 	mutex_lock(&context->io_mutex);
435 
436 	// Check if the fds are valid (mutex must be locked because
437 	// the table size could be changed)
438 	if ((uint32)oldfd >= context->table_size
439 		|| (uint32)newfd >= context->table_size
440 		|| context->fds[oldfd] == NULL
441 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
442 		mutex_unlock(&context->io_mutex);
443 		return B_FILE_ERROR;
444 	}
445 
446 	// Check for identity, note that it cannot be made above
447 	// because we always want to return an error on invalid
448 	// handles
449 	select_info* selectInfos = NULL;
450 	if (oldfd != newfd) {
451 		// Now do the work
452 		TFD(Dup2FD(context, oldfd, newfd));
453 
454 		evicted = context->fds[newfd];
455 		selectInfos = context->select_infos[newfd];
456 		context->select_infos[newfd] = NULL;
457 		atomic_add(&context->fds[oldfd]->ref_count, 1);
458 		atomic_add(&context->fds[oldfd]->open_count, 1);
459 		context->fds[newfd] = context->fds[oldfd];
460 
461 		if (evicted == NULL)
462 			context->num_used_fds++;
463 	}
464 
465 	fd_set_close_on_exec(context, newfd, false);
466 
467 	mutex_unlock(&context->io_mutex);
468 
469 	// Say bye bye to the evicted fd
470 	if (evicted) {
471 		deselect_select_infos(evicted, selectInfos, true);
472 		close_fd(context, evicted);
473 		put_fd(evicted);
474 	}
475 
476 	return newfd;
477 }
478 
479 
480 /*!	Duplicates an FD from another team to this/the kernel team.
481 	\param fromTeam The team which owns the FD.
482 	\param fd The FD to duplicate.
483 	\param kernel If \c true, the new FD will be created in the kernel team,
484 			the current userland team otherwise.
485 	\return The newly created FD or an error code, if something went wrong.
486 */
487 int
488 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
489 {
490 	// get the I/O context for the team in question
491 	Team* team = Team::Get(fromTeam);
492 	if (team == NULL)
493 		return B_BAD_TEAM_ID;
494 	BReference<Team> teamReference(team, true);
495 
496 	io_context* fromContext = team->io_context;
497 
498 	// get the file descriptor
499 	file_descriptor* descriptor = get_fd(fromContext, fd);
500 	if (descriptor == NULL)
501 		return B_FILE_ERROR;
502 	DescriptorPutter descriptorPutter(descriptor);
503 
504 	// create a new FD in the target I/O context
505 	int result = new_fd(get_current_io_context(kernel), descriptor);
506 	if (result >= 0) {
507 		// the descriptor reference belongs to the slot, now
508 		descriptorPutter.Detach();
509 	}
510 
511 	return result;
512 }
513 
514 
515 static status_t
516 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
517 {
518 	struct file_descriptor* descriptor;
519 	int status;
520 
521 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
522 	if (descriptor == NULL)
523 		return B_FILE_ERROR;
524 
525 	if (descriptor->ops->fd_ioctl)
526 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
527 	else
528 		status = B_DEV_INVALID_IOCTL;
529 
530 	if (status == B_DEV_INVALID_IOCTL)
531 		status = ENOTTY;
532 
533 	put_fd(descriptor);
534 	return status;
535 }
536 
537 
538 static void
539 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
540 	bool putSyncObjects)
541 {
542 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
543 
544 	select_info* info = infos;
545 	while (info != NULL) {
546 		select_sync* sync = info->sync;
547 
548 		// deselect the selected events
549 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
550 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
551 			for (uint16 event = 1; event < 16; event++) {
552 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
553 					descriptor->ops->fd_deselect(descriptor, event,
554 						(selectsync*)info);
555 				}
556 			}
557 		}
558 
559 		notify_select_events(info, B_EVENT_INVALID);
560 		info = info->next;
561 
562 		if (putSyncObjects)
563 			put_select_sync(sync);
564 	}
565 }
566 
567 
568 status_t
569 select_fd(int32 fd, struct select_info* info, bool kernel)
570 {
571 	TRACE(("select_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
572 		info->sync, info->selected_events));
573 
574 	FDGetter fdGetter;
575 		// define before the context locker, so it will be destroyed after it
576 
577 	io_context* context = get_current_io_context(kernel);
578 	MutexLocker locker(context->io_mutex);
579 
580 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
581 	if (descriptor == NULL)
582 		return B_FILE_ERROR;
583 
584 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
585 
586 	if (descriptor->ops->fd_select == NULL) {
587 		// if the I/O subsystem doesn't support select(), we will
588 		// immediately notify the select call
589 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
590 		if (eventsToSelect != 0)
591 			return notify_select_events(info, eventsToSelect);
592 		else
593 			return B_OK;
594 	}
595 
596 	// We need the FD to stay open while we're doing this, so no select()/
597 	// deselect() will be called on it after it is closed.
598 	atomic_add(&descriptor->open_count, 1);
599 
600 	locker.Unlock();
601 
602 	// select any events asked for
603 	uint32 selectedEvents = 0;
604 
605 	for (uint16 event = 1; event < 16; event++) {
606 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
607 			&& descriptor->ops->fd_select(descriptor, event,
608 				(selectsync*)info) == B_OK) {
609 			selectedEvents |= SELECT_FLAG(event);
610 		}
611 	}
612 	info->selected_events = selectedEvents
613 		| (info->selected_events & B_EVENT_INVALID);
614 
615 	// Add the info to the IO context. Even if nothing has been selected -- we
616 	// always support B_EVENT_INVALID.
617 	locker.Lock();
618 	if (context->fds[fd] != descriptor) {
619 		// Someone close()d the index in the meantime. deselect() all
620 		// events.
621 		info->next = NULL;
622 		deselect_select_infos(descriptor, info, false);
623 
624 		// Release our open reference of the descriptor.
625 		close_fd(context, descriptor);
626 		return B_FILE_ERROR;
627 	}
628 
629 	// The FD index hasn't changed, so we add the select info to the table.
630 
631 	info->next = context->select_infos[fd];
632 	context->select_infos[fd] = info;
633 
634 	// As long as the info is in the list, we keep a reference to the sync
635 	// object.
636 	atomic_add(&info->sync->ref_count, 1);
637 
638 	// Finally release our open reference. It is safe just to decrement,
639 	// since as long as the descriptor is associated with the slot,
640 	// someone else still has it open.
641 	atomic_add(&descriptor->open_count, -1);
642 
643 	return B_OK;
644 }
645 
646 
647 status_t
648 deselect_fd(int32 fd, struct select_info* info, bool kernel)
649 {
650 	TRACE(("deselect_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
651 		info->sync, info->selected_events));
652 
653 	FDGetter fdGetter;
654 		// define before the context locker, so it will be destroyed after it
655 
656 	io_context* context = get_current_io_context(kernel);
657 	MutexLocker locker(context->io_mutex);
658 
659 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
660 	if (descriptor == NULL)
661 		return B_FILE_ERROR;
662 
663 	// remove the info from the IO context
664 
665 	select_info** infoLocation = &context->select_infos[fd];
666 	while (*infoLocation != NULL && *infoLocation != info)
667 		infoLocation = &(*infoLocation)->next;
668 
669 	// If not found, someone else beat us to it.
670 	if (*infoLocation != info)
671 		return B_OK;
672 
673 	*infoLocation = info->next;
674 
675 	locker.Unlock();
676 
677 	// deselect the selected events
678 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
679 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
680 		for (uint16 event = 1; event < 16; event++) {
681 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
682 				descriptor->ops->fd_deselect(descriptor, event,
683 					(selectsync*)info);
684 			}
685 		}
686 	}
687 
688 	put_select_sync(info->sync);
689 
690 	return B_OK;
691 }
692 
693 
694 /*!	This function checks if the specified fd is valid in the current
695 	context. It can be used for a quick check; the fd is not locked
696 	so it could become invalid immediately after this check.
697 */
698 bool
699 fd_is_valid(int fd, bool kernel)
700 {
701 	struct file_descriptor* descriptor
702 		= get_fd(get_current_io_context(kernel), fd);
703 	if (descriptor == NULL)
704 		return false;
705 
706 	put_fd(descriptor);
707 	return true;
708 }
709 
710 
711 struct vnode*
712 fd_vnode(struct file_descriptor* descriptor)
713 {
714 	switch (descriptor->type) {
715 		case FDTYPE_FILE:
716 		case FDTYPE_DIR:
717 		case FDTYPE_ATTR_DIR:
718 		case FDTYPE_ATTR:
719 			return descriptor->u.vnode;
720 	}
721 
722 	return NULL;
723 }
724 
725 
726 static status_t
727 common_close(int fd, bool kernel)
728 {
729 	return close_fd_index(get_current_io_context(kernel), fd);
730 }
731 
732 
733 static ssize_t
734 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
735 {
736 	if (pos < -1)
737 		return B_BAD_VALUE;
738 
739 	FDGetter fdGetter;
740 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
741 	if (!descriptor)
742 		return B_FILE_ERROR;
743 
744 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
745 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
746 		return B_FILE_ERROR;
747 	}
748 
749 	bool movePosition = false;
750 	if (pos == -1) {
751 		pos = descriptor->pos;
752 		movePosition = true;
753 	}
754 
755 	if (write ? descriptor->ops->fd_write == NULL
756 			: descriptor->ops->fd_read == NULL) {
757 		return B_BAD_VALUE;
758 	}
759 
760 	if (length == 0)
761 		return 0;
762 
763 	if (!is_user_address_range(buffer, length))
764 		return B_BAD_ADDRESS;
765 
766 	SyscallRestartWrapper<status_t> status;
767 
768 	if (write)
769 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
770 	else
771 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
772 
773 	if (status != B_OK)
774 		return status;
775 
776 	if (movePosition) {
777 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
778 			? descriptor->ops->fd_seek(descriptor, 0, SEEK_END) : pos + length;
779 	}
780 
781 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
782 }
783 
784 
785 static ssize_t
786 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
787 	bool write)
788 {
789 	if (pos < -1)
790 		return B_BAD_VALUE;
791 
792 	iovec* vecs;
793 	status_t error = get_iovecs_from_user(userVecs, count, vecs, true);
794 	if (error != B_OK)
795 		return error;
796 	MemoryDeleter _(vecs);
797 
798 	FDGetter fdGetter;
799 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
800 	if (!descriptor)
801 		return B_FILE_ERROR;
802 
803 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
804 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
805 		return B_FILE_ERROR;
806 	}
807 
808 	bool movePosition = false;
809 	if (pos == -1) {
810 		pos = descriptor->pos;
811 		movePosition = true;
812 	}
813 
814 	if (write ? descriptor->ops->fd_write == NULL
815 			: descriptor->ops->fd_read == NULL) {
816 		return B_BAD_VALUE;
817 	}
818 
819 	SyscallRestartWrapper<status_t> status;
820 
821 	ssize_t bytesTransferred = 0;
822 	for (size_t i = 0; i < count; i++) {
823 		if (vecs[i].iov_base == NULL)
824 			continue;
825 
826 		size_t length = vecs[i].iov_len;
827 		if (write) {
828 			status = descriptor->ops->fd_write(descriptor, pos,
829 				vecs[i].iov_base, &length);
830 		} else {
831 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
832 				&length);
833 		}
834 
835 		if (status != B_OK) {
836 			if (bytesTransferred == 0)
837 				return status;
838 			status = B_OK;
839 			break;
840 		}
841 
842 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
843 			bytesTransferred = SSIZE_MAX;
844 		else
845 			bytesTransferred += (ssize_t)length;
846 
847 		pos += length;
848 
849 		if (length < vecs[i].iov_len)
850 			break;
851 	}
852 
853 	if (movePosition) {
854 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
855 			? descriptor->ops->fd_seek(descriptor, 0, SEEK_END) : pos;
856 	}
857 
858 	return bytesTransferred;
859 }
860 
861 
862 status_t
863 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
864 {
865 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
866 
867 	return fd_ioctl(false, fd, op, buffer, length);
868 }
869 
870 
871 //	#pragma mark - User syscalls
872 
873 
874 ssize_t
875 _user_read(int fd, off_t pos, void* buffer, size_t length)
876 {
877 	return common_user_io(fd, pos, buffer, length, false);
878 }
879 
880 
881 ssize_t
882 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
883 {
884 	return common_user_vector_io(fd, pos, userVecs, count, false);
885 }
886 
887 
888 ssize_t
889 _user_write(int fd, off_t pos, const void* buffer, size_t length)
890 {
891 	return common_user_io(fd, pos, (void*)buffer, length, true);
892 }
893 
894 
895 ssize_t
896 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
897 {
898 	return common_user_vector_io(fd, pos, userVecs, count, true);
899 }
900 
901 
902 off_t
903 _user_seek(int fd, off_t pos, int seekType)
904 {
905 	syscall_64_bit_return_value();
906 
907 	struct file_descriptor* descriptor;
908 
909 	descriptor = get_fd(get_current_io_context(false), fd);
910 	if (!descriptor)
911 		return B_FILE_ERROR;
912 
913 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
914 
915 	if (descriptor->ops->fd_seek)
916 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
917 	else
918 		pos = ESPIPE;
919 
920 	put_fd(descriptor);
921 	return pos;
922 }
923 
924 
925 status_t
926 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
927 {
928 	TRACE(("user_ioctl: fd %d\n", fd));
929 
930 	// "buffer" is not always a pointer depending on "op", so we cannot
931 	// check that it is a userland buffer here. Instead we check that
932 	// it is at least not within the bounds of kernel memory; as in
933 	// the cases where it is a numeric constant it is usually a low one.
934 	if (IS_KERNEL_ADDRESS(buffer))
935 		return B_BAD_ADDRESS;
936 
937 	SyscallRestartWrapper<status_t> status;
938 
939 	return status = fd_ioctl(false, fd, op, buffer, length);
940 }
941 
942 
943 ssize_t
944 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
945 	uint32 maxCount)
946 {
947 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
948 		"%" B_PRIu32 ")\n", fd, userBuffer, bufferSize, maxCount));
949 
950 	if (maxCount == 0)
951 		return 0;
952 
953 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
954 		return B_BAD_ADDRESS;
955 
956 	// get I/O context and FD
957 	io_context* ioContext = get_current_io_context(false);
958 	FDGetter fdGetter;
959 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
960 	if (descriptor == NULL)
961 		return B_FILE_ERROR;
962 
963 	if (descriptor->ops->fd_read_dir == NULL)
964 		return B_UNSUPPORTED;
965 
966 	// restrict buffer size and allocate a heap buffer
967 	if (bufferSize > kMaxReadDirBufferSize)
968 		bufferSize = kMaxReadDirBufferSize;
969 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
970 	if (buffer == NULL)
971 		return B_NO_MEMORY;
972 	MemoryDeleter bufferDeleter(buffer);
973 
974 	// read the directory
975 	uint32 count = maxCount;
976 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
977 		buffer, bufferSize, &count);
978 	if (status != B_OK)
979 		return status;
980 
981 	ASSERT(count <= maxCount);
982 
983 	// copy the buffer back -- determine the total buffer size first
984 	size_t sizeToCopy = 0;
985 	BytePointer<struct dirent> entry = buffer;
986 	for (uint32 i = 0; i < count; i++) {
987 		size_t length = entry->d_reclen;
988 		sizeToCopy += length;
989 		entry += length;
990 	}
991 
992 	ASSERT(sizeToCopy <= bufferSize);
993 
994 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
995 		return B_BAD_ADDRESS;
996 
997 	return count;
998 }
999 
1000 
1001 status_t
1002 _user_rewind_dir(int fd)
1003 {
1004 	struct file_descriptor* descriptor;
1005 	status_t status;
1006 
1007 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
1008 
1009 	descriptor = get_fd(get_current_io_context(false), fd);
1010 	if (descriptor == NULL)
1011 		return B_FILE_ERROR;
1012 
1013 	if (descriptor->ops->fd_rewind_dir)
1014 		status = descriptor->ops->fd_rewind_dir(descriptor);
1015 	else
1016 		status = B_UNSUPPORTED;
1017 
1018 	put_fd(descriptor);
1019 	return status;
1020 }
1021 
1022 
1023 status_t
1024 _user_close(int fd)
1025 {
1026 	return common_close(fd, false);
1027 }
1028 
1029 
1030 int
1031 _user_dup(int fd)
1032 {
1033 	return dup_fd(fd, false);
1034 }
1035 
1036 
1037 int
1038 _user_dup2(int ofd, int nfd)
1039 {
1040 	return dup2_fd(ofd, nfd, false);
1041 }
1042 
1043 
1044 //	#pragma mark - Kernel calls
1045 
1046 
1047 ssize_t
1048 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1049 {
1050 	if (pos < -1)
1051 		return B_BAD_VALUE;
1052 
1053 	FDGetter fdGetter;
1054 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1055 
1056 	if (!descriptor)
1057 		return B_FILE_ERROR;
1058 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1059 		return B_FILE_ERROR;
1060 
1061 	bool movePosition = false;
1062 	if (pos == -1) {
1063 		pos = descriptor->pos;
1064 		movePosition = true;
1065 	}
1066 
1067 	SyscallFlagUnsetter _;
1068 
1069 	if (descriptor->ops->fd_read == NULL)
1070 		return B_BAD_VALUE;
1071 
1072 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1073 		&length);
1074 	if (bytesRead >= B_OK) {
1075 		if (length > SSIZE_MAX)
1076 			bytesRead = SSIZE_MAX;
1077 		else
1078 			bytesRead = (ssize_t)length;
1079 
1080 		if (movePosition)
1081 			descriptor->pos = pos + length;
1082 	}
1083 
1084 	return bytesRead;
1085 }
1086 
1087 
1088 ssize_t
1089 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1090 {
1091 	bool movePosition = false;
1092 	status_t status;
1093 
1094 	if (pos < -1)
1095 		return B_BAD_VALUE;
1096 
1097 	FDGetter fdGetter;
1098 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1099 
1100 	if (!descriptor)
1101 		return B_FILE_ERROR;
1102 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1103 		return B_FILE_ERROR;
1104 
1105 	if (pos == -1) {
1106 		pos = descriptor->pos;
1107 		movePosition = true;
1108 	}
1109 
1110 	if (descriptor->ops->fd_read == NULL)
1111 		return B_BAD_VALUE;
1112 
1113 	SyscallFlagUnsetter _;
1114 
1115 	ssize_t bytesRead = 0;
1116 
1117 	for (size_t i = 0; i < count; i++) {
1118 		size_t length = vecs[i].iov_len;
1119 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1120 			&length);
1121 		if (status != B_OK) {
1122 			bytesRead = status;
1123 			break;
1124 		}
1125 
1126 		if ((uint64)bytesRead + length > SSIZE_MAX)
1127 			bytesRead = SSIZE_MAX;
1128 		else
1129 			bytesRead += (ssize_t)length;
1130 
1131 		pos += vecs[i].iov_len;
1132 	}
1133 
1134 	if (movePosition)
1135 		descriptor->pos = pos;
1136 
1137 	return bytesRead;
1138 }
1139 
1140 
1141 ssize_t
1142 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1143 {
1144 	if (pos < -1)
1145 		return B_BAD_VALUE;
1146 
1147 	FDGetter fdGetter;
1148 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1149 
1150 	if (descriptor == NULL)
1151 		return B_FILE_ERROR;
1152 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1153 		return B_FILE_ERROR;
1154 
1155 	bool movePosition = false;
1156 	if (pos == -1) {
1157 		pos = descriptor->pos;
1158 		movePosition = true;
1159 	}
1160 
1161 	if (descriptor->ops->fd_write == NULL)
1162 		return B_BAD_VALUE;
1163 
1164 	SyscallFlagUnsetter _;
1165 
1166 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1167 		&length);
1168 	if (bytesWritten >= B_OK) {
1169 		if (length > SSIZE_MAX)
1170 			bytesWritten = SSIZE_MAX;
1171 		else
1172 			bytesWritten = (ssize_t)length;
1173 
1174 		if (movePosition)
1175 			descriptor->pos = pos + length;
1176 	}
1177 
1178 	return bytesWritten;
1179 }
1180 
1181 
1182 ssize_t
1183 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1184 {
1185 	bool movePosition = false;
1186 	status_t status;
1187 
1188 	if (pos < -1)
1189 		return B_BAD_VALUE;
1190 
1191 	FDGetter fdGetter;
1192 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1193 
1194 	if (!descriptor)
1195 		return B_FILE_ERROR;
1196 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1197 		return B_FILE_ERROR;
1198 
1199 	if (pos == -1) {
1200 		pos = descriptor->pos;
1201 		movePosition = true;
1202 	}
1203 
1204 	if (descriptor->ops->fd_write == NULL)
1205 		return B_BAD_VALUE;
1206 
1207 	SyscallFlagUnsetter _;
1208 
1209 	ssize_t bytesWritten = 0;
1210 
1211 	for (size_t i = 0; i < count; i++) {
1212 		size_t length = vecs[i].iov_len;
1213 		status = descriptor->ops->fd_write(descriptor, pos,
1214 			vecs[i].iov_base, &length);
1215 		if (status != B_OK) {
1216 			bytesWritten = status;
1217 			break;
1218 		}
1219 
1220 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1221 			bytesWritten = SSIZE_MAX;
1222 		else
1223 			bytesWritten += (ssize_t)length;
1224 
1225 		pos += vecs[i].iov_len;
1226 	}
1227 
1228 	if (movePosition)
1229 		descriptor->pos = pos;
1230 
1231 	return bytesWritten;
1232 }
1233 
1234 
1235 off_t
1236 _kern_seek(int fd, off_t pos, int seekType)
1237 {
1238 	struct file_descriptor* descriptor;
1239 
1240 	descriptor = get_fd(get_current_io_context(true), fd);
1241 	if (!descriptor)
1242 		return B_FILE_ERROR;
1243 
1244 	if (descriptor->ops->fd_seek)
1245 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1246 	else
1247 		pos = ESPIPE;
1248 
1249 	put_fd(descriptor);
1250 	return pos;
1251 }
1252 
1253 
1254 status_t
1255 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1256 {
1257 	TRACE(("kern_ioctl: fd %d\n", fd));
1258 
1259 	SyscallFlagUnsetter _;
1260 
1261 	return fd_ioctl(true, fd, op, buffer, length);
1262 }
1263 
1264 
1265 ssize_t
1266 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1267 	uint32 maxCount)
1268 {
1269 	struct file_descriptor* descriptor;
1270 	ssize_t retval;
1271 
1272 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1273 		"%" B_PRIu32 ")\n",fd, buffer, bufferSize, maxCount));
1274 
1275 	struct io_context* ioContext = get_current_io_context(true);
1276 	descriptor = get_fd(ioContext, fd);
1277 	if (descriptor == NULL)
1278 		return B_FILE_ERROR;
1279 
1280 	if (descriptor->ops->fd_read_dir) {
1281 		uint32 count = maxCount;
1282 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1283 			bufferSize, &count);
1284 		if (retval >= 0)
1285 			retval = count;
1286 	} else
1287 		retval = B_UNSUPPORTED;
1288 
1289 	put_fd(descriptor);
1290 	return retval;
1291 }
1292 
1293 
1294 status_t
1295 _kern_rewind_dir(int fd)
1296 {
1297 	struct file_descriptor* descriptor;
1298 	status_t status;
1299 
1300 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1301 
1302 	descriptor = get_fd(get_current_io_context(true), fd);
1303 	if (descriptor == NULL)
1304 		return B_FILE_ERROR;
1305 
1306 	if (descriptor->ops->fd_rewind_dir)
1307 		status = descriptor->ops->fd_rewind_dir(descriptor);
1308 	else
1309 		status = B_UNSUPPORTED;
1310 
1311 	put_fd(descriptor);
1312 	return status;
1313 }
1314 
1315 
1316 status_t
1317 _kern_close(int fd)
1318 {
1319 	return common_close(fd, true);
1320 }
1321 
1322 
1323 int
1324 _kern_dup(int fd)
1325 {
1326 	return dup_fd(fd, true);
1327 }
1328 
1329 
1330 int
1331 _kern_dup2(int ofd, int nfd)
1332 {
1333 	return dup2_fd(ofd, nfd, true);
1334 }
1335 
1336