xref: /haiku/src/system/kernel/fs/fd.cpp (revision e705c841d784f0035a0ef3e9e96f6e017df16681)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 
20 #include <syscalls.h>
21 #include <syscall_restart.h>
22 #include <util/AutoLock.h>
23 #include <vfs.h>
24 #include <wait_for_objects.h>
25 
26 #include "vfs_tracing.h"
27 
28 
29 //#define TRACE_FD
30 #ifdef TRACE_FD
31 #	define TRACE(x) dprintf x
32 #else
33 #	define TRACE(x)
34 #endif
35 
36 
37 static const size_t kMaxReadDirBufferSize = 64 * 1024;
38 
39 
40 static struct file_descriptor* get_fd_locked(struct io_context* context,
41 	int fd);
42 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
43 static void deselect_select_infos(file_descriptor* descriptor,
44 	select_info* infos, bool putSyncObjects);
45 
46 
47 struct FDGetterLocking {
48 	inline bool Lock(file_descriptor* /*lockable*/)
49 	{
50 		return false;
51 	}
52 
53 	inline void Unlock(file_descriptor* lockable)
54 	{
55 		put_fd(lockable);
56 	}
57 };
58 
59 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
60 public:
61 	inline FDGetter()
62 		: AutoLocker<file_descriptor, FDGetterLocking>()
63 	{
64 	}
65 
66 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
67 		: AutoLocker<file_descriptor, FDGetterLocking>(
68 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
69 	{
70 	}
71 
72 	inline file_descriptor* SetTo(io_context* context, int fd,
73 		bool contextLocked = false)
74 	{
75 		file_descriptor* descriptor
76 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
77 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
78 		return descriptor;
79 	}
80 
81 	inline file_descriptor* SetTo(int fd, bool kernel,
82 		bool contextLocked = false)
83 	{
84 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
85 	}
86 
87 	inline file_descriptor* FD() const
88 	{
89 		return fLockable;
90 	}
91 };
92 
93 
94 //	#pragma mark - General fd routines
95 
96 
97 #ifdef DEBUG
98 void dump_fd(int fd, struct file_descriptor* descriptor);
99 
100 void
101 dump_fd(int fd,struct file_descriptor* descriptor)
102 {
103 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
104 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
105 		", pos = %" B_PRId64 "\n",
106 		fd, descriptor, descriptor->type, descriptor->ref_count,
107 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
108 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
109 }
110 #endif
111 
112 
113 /*! Allocates and initializes a new file_descriptor.
114 */
115 struct file_descriptor*
116 alloc_fd(void)
117 {
118 	file_descriptor* descriptor
119 		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
120 	if (descriptor == NULL)
121 		return NULL;
122 
123 	descriptor->u.vnode = NULL;
124 	descriptor->cookie = NULL;
125 	descriptor->ref_count = 1;
126 	descriptor->open_count = 0;
127 	descriptor->open_mode = 0;
128 	descriptor->pos = 0;
129 
130 	return descriptor;
131 }
132 
133 
134 bool
135 fd_close_on_exec(struct io_context* context, int fd)
136 {
137 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
138 }
139 
140 
141 void
142 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
143 {
144 	if (closeFD)
145 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
146 	else
147 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
148 }
149 
150 
151 /*!	Searches a free slot in the FD table of the provided I/O context, and
152 	inserts the specified descriptor into it.
153 */
154 int
155 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
156 	int firstIndex)
157 {
158 	int fd = -1;
159 	uint32 i;
160 
161 	mutex_lock(&context->io_mutex);
162 
163 	for (i = firstIndex; i < context->table_size; i++) {
164 		if (!context->fds[i]) {
165 			fd = i;
166 			break;
167 		}
168 	}
169 	if (fd < 0) {
170 		fd = B_NO_MORE_FDS;
171 		goto err;
172 	}
173 
174 	TFD(NewFD(context, fd, descriptor));
175 
176 	context->fds[fd] = descriptor;
177 	context->num_used_fds++;
178 	atomic_add(&descriptor->open_count, 1);
179 
180 err:
181 	mutex_unlock(&context->io_mutex);
182 
183 	return fd;
184 }
185 
186 
187 int
188 new_fd(struct io_context* context, struct file_descriptor* descriptor)
189 {
190 	return new_fd_etc(context, descriptor, 0);
191 }
192 
193 
194 /*!	Reduces the descriptor's reference counter, and frees all resources
195 	when it's no longer used.
196 */
197 void
198 put_fd(struct file_descriptor* descriptor)
199 {
200 	int32 previous = atomic_add(&descriptor->ref_count, -1);
201 
202 	TFD(PutFD(descriptor));
203 
204 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
205 		descriptor, descriptor->ref_count, descriptor->cookie));
206 
207 	// free the descriptor if we don't need it anymore
208 	if (previous == 1) {
209 		// free the underlying object
210 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
211 			descriptor->ops->fd_free(descriptor);
212 
213 		free(descriptor);
214 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
215 		&& previous - 1 == descriptor->open_count
216 		&& descriptor->ops != NULL) {
217 		// the descriptor has been disconnected - it cannot
218 		// be accessed anymore, let's close it (no one is
219 		// currently accessing this descriptor)
220 
221 		if (descriptor->ops->fd_close)
222 			descriptor->ops->fd_close(descriptor);
223 		if (descriptor->ops->fd_free)
224 			descriptor->ops->fd_free(descriptor);
225 
226 		// prevent this descriptor from being closed/freed again
227 		descriptor->ops = NULL;
228 		descriptor->u.vnode = NULL;
229 
230 		// the file descriptor is kept intact, so that it's not
231 		// reused until someone explicitly closes it
232 	}
233 }
234 
235 
236 /*!	Decrements the open counter of the file descriptor and invokes
237 	its close hook when appropriate.
238 */
239 void
240 close_fd(struct io_context* context, struct file_descriptor* descriptor)
241 {
242 	// POSIX advisory locks need to be released when any file descriptor closes
243 	if (descriptor->type == FDTYPE_FILE)
244 		vfs_release_posix_lock(context, descriptor);
245 
246 	if (atomic_add(&descriptor->open_count, -1) == 1) {
247 		vfs_unlock_vnode_if_locked(descriptor);
248 
249 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
250 			descriptor->ops->fd_close(descriptor);
251 	}
252 }
253 
254 
255 status_t
256 close_fd_index(struct io_context* context, int fd)
257 {
258 	struct file_descriptor* descriptor = remove_fd(context, fd);
259 
260 	if (descriptor == NULL)
261 		return B_FILE_ERROR;
262 
263 	close_fd(context, descriptor);
264 	put_fd(descriptor);
265 		// the reference associated with the slot
266 
267 	return B_OK;
268 }
269 
270 
271 /*!	This descriptor's underlying object will be closed and freed as soon as
272 	possible (in one of the next calls to put_fd() - get_fd() will no longer
273 	succeed on this descriptor).
274 	This is useful if the underlying object is gone, for instance when a
275 	(mounted) volume got removed unexpectedly.
276 */
277 void
278 disconnect_fd(struct file_descriptor* descriptor)
279 {
280 	descriptor->open_mode |= O_DISCONNECTED;
281 }
282 
283 
284 void
285 inc_fd_ref_count(struct file_descriptor* descriptor)
286 {
287 	atomic_add(&descriptor->ref_count, 1);
288 }
289 
290 
291 static struct file_descriptor*
292 get_fd_locked(struct io_context* context, int fd)
293 {
294 	if (fd < 0 || (uint32)fd >= context->table_size)
295 		return NULL;
296 
297 	struct file_descriptor* descriptor = context->fds[fd];
298 
299 	if (descriptor != NULL) {
300 		// disconnected descriptors cannot be accessed anymore
301 		if (descriptor->open_mode & O_DISCONNECTED)
302 			return NULL;
303 
304 		TFD(GetFD(context, fd, descriptor));
305 		inc_fd_ref_count(descriptor);
306 	}
307 
308 	return descriptor;
309 }
310 
311 
312 struct file_descriptor*
313 get_fd(struct io_context* context, int fd)
314 {
315 	MutexLocker _(context->io_mutex);
316 
317 	return get_fd_locked(context, fd);
318 }
319 
320 
321 struct file_descriptor*
322 get_open_fd(struct io_context* context, int fd)
323 {
324 	MutexLocker _(context->io_mutex);
325 
326 	file_descriptor* descriptor = get_fd_locked(context, fd);
327 	if (descriptor == NULL)
328 		return NULL;
329 
330 	atomic_add(&descriptor->open_count, 1);
331 
332 	return descriptor;
333 }
334 
335 
336 /*!	Removes the file descriptor from the specified slot.
337 */
338 static struct file_descriptor*
339 remove_fd(struct io_context* context, int fd)
340 {
341 	struct file_descriptor* descriptor = NULL;
342 
343 	if (fd < 0)
344 		return NULL;
345 
346 	mutex_lock(&context->io_mutex);
347 
348 	if ((uint32)fd < context->table_size)
349 		descriptor = context->fds[fd];
350 
351 	select_info* selectInfos = NULL;
352 	bool disconnected = false;
353 
354 	if (descriptor != NULL)	{
355 		// fd is valid
356 		TFD(RemoveFD(context, fd, descriptor));
357 
358 		context->fds[fd] = NULL;
359 		fd_set_close_on_exec(context, fd, false);
360 		context->num_used_fds--;
361 
362 		selectInfos = context->select_infos[fd];
363 		context->select_infos[fd] = NULL;
364 
365 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
366 	}
367 
368 	mutex_unlock(&context->io_mutex);
369 
370 	if (selectInfos != NULL)
371 		deselect_select_infos(descriptor, selectInfos, true);
372 
373 	return disconnected ? NULL : descriptor;
374 }
375 
376 
377 static int
378 dup_fd(int fd, bool kernel)
379 {
380 	struct io_context* context = get_current_io_context(kernel);
381 	struct file_descriptor* descriptor;
382 	int status;
383 
384 	TRACE(("dup_fd: fd = %d\n", fd));
385 
386 	// Try to get the fd structure
387 	descriptor = get_fd(context, fd);
388 	if (descriptor == NULL)
389 		return B_FILE_ERROR;
390 
391 	// now put the fd in place
392 	status = new_fd(context, descriptor);
393 	if (status < 0)
394 		put_fd(descriptor);
395 	else {
396 		mutex_lock(&context->io_mutex);
397 		fd_set_close_on_exec(context, status, false);
398 		mutex_unlock(&context->io_mutex);
399 	}
400 
401 	return status;
402 }
403 
404 
405 /*!	POSIX says this should be the same as:
406 		close(newfd);
407 		fcntl(oldfd, F_DUPFD, newfd);
408 
409 	We do dup2() directly to be thread-safe.
410 */
411 static int
412 dup2_fd(int oldfd, int newfd, bool kernel)
413 {
414 	struct file_descriptor* evicted = NULL;
415 	struct io_context* context;
416 
417 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
418 
419 	// quick check
420 	if (oldfd < 0 || newfd < 0)
421 		return B_FILE_ERROR;
422 
423 	// Get current I/O context and lock it
424 	context = get_current_io_context(kernel);
425 	mutex_lock(&context->io_mutex);
426 
427 	// Check if the fds are valid (mutex must be locked because
428 	// the table size could be changed)
429 	if ((uint32)oldfd >= context->table_size
430 		|| (uint32)newfd >= context->table_size
431 		|| context->fds[oldfd] == NULL
432 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
433 		mutex_unlock(&context->io_mutex);
434 		return B_FILE_ERROR;
435 	}
436 
437 	// Check for identity, note that it cannot be made above
438 	// because we always want to return an error on invalid
439 	// handles
440 	select_info* selectInfos = NULL;
441 	if (oldfd != newfd) {
442 		// Now do the work
443 		TFD(Dup2FD(context, oldfd, newfd));
444 
445 		evicted = context->fds[newfd];
446 		selectInfos = context->select_infos[newfd];
447 		context->select_infos[newfd] = NULL;
448 		atomic_add(&context->fds[oldfd]->ref_count, 1);
449 		atomic_add(&context->fds[oldfd]->open_count, 1);
450 		context->fds[newfd] = context->fds[oldfd];
451 
452 		if (evicted == NULL)
453 			context->num_used_fds++;
454 	}
455 
456 	fd_set_close_on_exec(context, newfd, false);
457 
458 	mutex_unlock(&context->io_mutex);
459 
460 	// Say bye bye to the evicted fd
461 	if (evicted) {
462 		deselect_select_infos(evicted, selectInfos, true);
463 		close_fd(context, evicted);
464 		put_fd(evicted);
465 	}
466 
467 	return newfd;
468 }
469 
470 
471 /*!	Duplicates an FD from another team to this/the kernel team.
472 	\param fromTeam The team which owns the FD.
473 	\param fd The FD to duplicate.
474 	\param kernel If \c true, the new FD will be created in the kernel team,
475 			the current userland team otherwise.
476 	\return The newly created FD or an error code, if something went wrong.
477 */
478 int
479 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
480 {
481 	// get the I/O context for the team in question
482 	Team* team = Team::Get(fromTeam);
483 	if (team == NULL)
484 		return B_BAD_TEAM_ID;
485 	BReference<Team> teamReference(team, true);
486 
487 	io_context* fromContext = team->io_context;
488 
489 	// get the file descriptor
490 	file_descriptor* descriptor = get_fd(fromContext, fd);
491 	if (descriptor == NULL)
492 		return B_FILE_ERROR;
493 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
494 
495 	// create a new FD in the target I/O context
496 	int result = new_fd(get_current_io_context(kernel), descriptor);
497 	if (result >= 0) {
498 		// the descriptor reference belongs to the slot, now
499 		descriptorPutter.Detach();
500 	}
501 
502 	return result;
503 }
504 
505 
506 static status_t
507 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
508 {
509 	struct file_descriptor* descriptor;
510 	int status;
511 
512 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
513 	if (descriptor == NULL)
514 		return B_FILE_ERROR;
515 
516 	if (descriptor->ops->fd_ioctl)
517 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
518 	else
519 		status = B_DEV_INVALID_IOCTL;
520 
521 	if (status == B_DEV_INVALID_IOCTL)
522 		status = ENOTTY;
523 
524 	put_fd(descriptor);
525 	return status;
526 }
527 
528 
529 static void
530 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
531 	bool putSyncObjects)
532 {
533 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
534 
535 	select_info* info = infos;
536 	while (info != NULL) {
537 		select_sync* sync = info->sync;
538 
539 		// deselect the selected events
540 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
541 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
542 			for (uint16 event = 1; event < 16; event++) {
543 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
544 					descriptor->ops->fd_deselect(descriptor, event,
545 						(selectsync*)info);
546 				}
547 			}
548 		}
549 
550 		notify_select_events(info, B_EVENT_INVALID);
551 		info = info->next;
552 
553 		if (putSyncObjects)
554 			put_select_sync(sync);
555 	}
556 }
557 
558 
559 status_t
560 select_fd(int32 fd, struct select_info* info, bool kernel)
561 {
562 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
563 		info->sync, info->selected_events));
564 
565 	FDGetter fdGetter;
566 		// define before the context locker, so it will be destroyed after it
567 
568 	io_context* context = get_current_io_context(kernel);
569 	MutexLocker locker(context->io_mutex);
570 
571 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
572 	if (descriptor == NULL)
573 		return B_FILE_ERROR;
574 
575 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
576 
577 	if (descriptor->ops->fd_select == NULL) {
578 		// if the I/O subsystem doesn't support select(), we will
579 		// immediately notify the select call
580 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
581 		if (eventsToSelect != 0)
582 			return notify_select_events(info, eventsToSelect);
583 		else
584 			return B_OK;
585 	}
586 
587 	// We need the FD to stay open while we're doing this, so no select()/
588 	// deselect() will be called on it after it is closed.
589 	atomic_add(&descriptor->open_count, 1);
590 
591 	locker.Unlock();
592 
593 	// select any events asked for
594 	uint32 selectedEvents = 0;
595 
596 	for (uint16 event = 1; event < 16; event++) {
597 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
598 			&& descriptor->ops->fd_select(descriptor, event,
599 				(selectsync*)info) == B_OK) {
600 			selectedEvents |= SELECT_FLAG(event);
601 		}
602 	}
603 	info->selected_events = selectedEvents
604 		| (info->selected_events & B_EVENT_INVALID);
605 
606 	// Add the info to the IO context. Even if nothing has been selected -- we
607 	// always support B_EVENT_INVALID.
608 	locker.Lock();
609 	if (context->fds[fd] != descriptor) {
610 		// Someone close()d the index in the meantime. deselect() all
611 		// events.
612 		info->next = NULL;
613 		deselect_select_infos(descriptor, info, false);
614 
615 		// Release our open reference of the descriptor.
616 		close_fd(context, descriptor);
617 		return B_FILE_ERROR;
618 	}
619 
620 	// The FD index hasn't changed, so we add the select info to the table.
621 
622 	info->next = context->select_infos[fd];
623 	context->select_infos[fd] = info;
624 
625 	// As long as the info is in the list, we keep a reference to the sync
626 	// object.
627 	atomic_add(&info->sync->ref_count, 1);
628 
629 	// Finally release our open reference. It is safe just to decrement,
630 	// since as long as the descriptor is associated with the slot,
631 	// someone else still has it open.
632 	atomic_add(&descriptor->open_count, -1);
633 
634 	return B_OK;
635 }
636 
637 
638 status_t
639 deselect_fd(int32 fd, struct select_info* info, bool kernel)
640 {
641 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
642 		info->sync, info->selected_events));
643 
644 	FDGetter fdGetter;
645 		// define before the context locker, so it will be destroyed after it
646 
647 	io_context* context = get_current_io_context(kernel);
648 	MutexLocker locker(context->io_mutex);
649 
650 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
651 	if (descriptor == NULL)
652 		return B_FILE_ERROR;
653 
654 	// remove the info from the IO context
655 
656 	select_info** infoLocation = &context->select_infos[fd];
657 	while (*infoLocation != NULL && *infoLocation != info)
658 		infoLocation = &(*infoLocation)->next;
659 
660 	// If not found, someone else beat us to it.
661 	if (*infoLocation != info)
662 		return B_OK;
663 
664 	*infoLocation = info->next;
665 
666 	locker.Unlock();
667 
668 	// deselect the selected events
669 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
670 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
671 		for (uint16 event = 1; event < 16; event++) {
672 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
673 				descriptor->ops->fd_deselect(descriptor, event,
674 					(selectsync*)info);
675 			}
676 		}
677 	}
678 
679 	put_select_sync(info->sync);
680 
681 	return B_OK;
682 }
683 
684 
685 /*!	This function checks if the specified fd is valid in the current
686 	context. It can be used for a quick check; the fd is not locked
687 	so it could become invalid immediately after this check.
688 */
689 bool
690 fd_is_valid(int fd, bool kernel)
691 {
692 	struct file_descriptor* descriptor
693 		= get_fd(get_current_io_context(kernel), fd);
694 	if (descriptor == NULL)
695 		return false;
696 
697 	put_fd(descriptor);
698 	return true;
699 }
700 
701 
702 struct vnode*
703 fd_vnode(struct file_descriptor* descriptor)
704 {
705 	switch (descriptor->type) {
706 		case FDTYPE_FILE:
707 		case FDTYPE_DIR:
708 		case FDTYPE_ATTR_DIR:
709 		case FDTYPE_ATTR:
710 			return descriptor->u.vnode;
711 	}
712 
713 	return NULL;
714 }
715 
716 
717 static status_t
718 common_close(int fd, bool kernel)
719 {
720 	return close_fd_index(get_current_io_context(kernel), fd);
721 }
722 
723 
724 static ssize_t
725 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
726 {
727 	if (!IS_USER_ADDRESS(buffer))
728 		return B_BAD_ADDRESS;
729 
730 	if (pos < -1)
731 		return B_BAD_VALUE;
732 
733 	FDGetter fdGetter;
734 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
735 	if (!descriptor)
736 		return B_FILE_ERROR;
737 
738 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
739 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
740 		return B_FILE_ERROR;
741 	}
742 
743 	bool movePosition = false;
744 	if (pos == -1) {
745 		pos = descriptor->pos;
746 		movePosition = true;
747 	}
748 
749 	if (write ? descriptor->ops->fd_write == NULL
750 			: descriptor->ops->fd_read == NULL) {
751 		return B_BAD_VALUE;
752 	}
753 
754 	SyscallRestartWrapper<status_t> status;
755 
756 	if (write)
757 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
758 	else
759 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
760 
761 	if (status != B_OK)
762 		return status;
763 
764 	if (movePosition)
765 		descriptor->pos = pos + length;
766 
767 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
768 }
769 
770 
771 static ssize_t
772 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
773 	bool write)
774 {
775 	if (!IS_USER_ADDRESS(userVecs))
776 		return B_BAD_ADDRESS;
777 
778 	if (pos < -1)
779 		return B_BAD_VALUE;
780 
781 	// prevent integer overflow exploit in malloc()
782 	if (count > IOV_MAX)
783 		return B_BAD_VALUE;
784 
785 	FDGetter fdGetter;
786 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
787 	if (!descriptor)
788 		return B_FILE_ERROR;
789 
790 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
791 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
792 		return B_FILE_ERROR;
793 	}
794 
795 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
796 	if (vecs == NULL)
797 		return B_NO_MEMORY;
798 	MemoryDeleter _(vecs);
799 
800 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
801 		return B_BAD_ADDRESS;
802 
803 	bool movePosition = false;
804 	if (pos == -1) {
805 		pos = descriptor->pos;
806 		movePosition = true;
807 	}
808 
809 	if (write ? descriptor->ops->fd_write == NULL
810 			: descriptor->ops->fd_read == NULL) {
811 		return B_BAD_VALUE;
812 	}
813 
814 	SyscallRestartWrapper<status_t> status;
815 
816 	ssize_t bytesTransferred = 0;
817 	for (uint32 i = 0; i < count; i++) {
818 		size_t length = vecs[i].iov_len;
819 		if (write) {
820 			status = descriptor->ops->fd_write(descriptor, pos,
821 				vecs[i].iov_base, &length);
822 		} else {
823 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
824 				&length);
825 		}
826 
827 		if (status != B_OK) {
828 			if (bytesTransferred == 0)
829 				return status;
830 			status = B_OK;
831 			break;
832 		}
833 
834 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
835 			bytesTransferred = SSIZE_MAX;
836 		else
837 			bytesTransferred += (ssize_t)length;
838 
839 		pos += length;
840 
841 		if (length < vecs[i].iov_len)
842 			break;
843 	}
844 
845 	if (movePosition)
846 		descriptor->pos = pos;
847 
848 	return bytesTransferred;
849 }
850 
851 
852 status_t
853 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
854 {
855 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
856 
857 	return fd_ioctl(false, fd, op, buffer, length);
858 }
859 
860 
861 //	#pragma mark - User syscalls
862 
863 
864 ssize_t
865 _user_read(int fd, off_t pos, void* buffer, size_t length)
866 {
867 	return common_user_io(fd, pos, buffer, length, false);
868 }
869 
870 
871 ssize_t
872 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
873 {
874 	return common_user_vector_io(fd, pos, userVecs, count, false);
875 }
876 
877 
878 ssize_t
879 _user_write(int fd, off_t pos, const void* buffer, size_t length)
880 {
881 	return common_user_io(fd, pos, (void*)buffer, length, true);
882 }
883 
884 
885 ssize_t
886 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
887 {
888 	return common_user_vector_io(fd, pos, userVecs, count, true);
889 }
890 
891 
892 off_t
893 _user_seek(int fd, off_t pos, int seekType)
894 {
895 	syscall_64_bit_return_value();
896 
897 	struct file_descriptor* descriptor;
898 
899 	descriptor = get_fd(get_current_io_context(false), fd);
900 	if (!descriptor)
901 		return B_FILE_ERROR;
902 
903 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
904 
905 	if (descriptor->ops->fd_seek)
906 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
907 	else
908 		pos = ESPIPE;
909 
910 	put_fd(descriptor);
911 	return pos;
912 }
913 
914 
915 status_t
916 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
917 {
918 	if (!IS_USER_ADDRESS(buffer))
919 		return B_BAD_ADDRESS;
920 
921 	TRACE(("user_ioctl: fd %d\n", fd));
922 
923 	SyscallRestartWrapper<status_t> status;
924 
925 	return status = fd_ioctl(false, fd, op, buffer, length);
926 }
927 
928 
929 ssize_t
930 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
931 	uint32 maxCount)
932 {
933 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
934 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
935 
936 	if (maxCount == 0)
937 		return 0;
938 
939 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
940 		return B_BAD_ADDRESS;
941 
942 	// get I/O context and FD
943 	io_context* ioContext = get_current_io_context(false);
944 	FDGetter fdGetter;
945 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
946 	if (descriptor == NULL)
947 		return B_FILE_ERROR;
948 
949 	if (descriptor->ops->fd_read_dir == NULL)
950 		return B_UNSUPPORTED;
951 
952 	// restrict buffer size and allocate a heap buffer
953 	if (bufferSize > kMaxReadDirBufferSize)
954 		bufferSize = kMaxReadDirBufferSize;
955 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
956 	if (buffer == NULL)
957 		return B_NO_MEMORY;
958 	MemoryDeleter bufferDeleter(buffer);
959 
960 	// read the directory
961 	uint32 count = maxCount;
962 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
963 		buffer, bufferSize, &count);
964 	if (status != B_OK)
965 		return status;
966 
967 	// copy the buffer back -- determine the total buffer size first
968 	size_t sizeToCopy = 0;
969 	struct dirent* entry = buffer;
970 	for (uint32 i = 0; i < count; i++) {
971 		size_t length = entry->d_reclen;
972 		sizeToCopy += length;
973 		entry = (struct dirent*)((uint8*)entry + length);
974 	}
975 
976 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
977 		return B_BAD_ADDRESS;
978 
979 	return count;
980 }
981 
982 
983 status_t
984 _user_rewind_dir(int fd)
985 {
986 	struct file_descriptor* descriptor;
987 	status_t status;
988 
989 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
990 
991 	descriptor = get_fd(get_current_io_context(false), fd);
992 	if (descriptor == NULL)
993 		return B_FILE_ERROR;
994 
995 	if (descriptor->ops->fd_rewind_dir)
996 		status = descriptor->ops->fd_rewind_dir(descriptor);
997 	else
998 		status = B_UNSUPPORTED;
999 
1000 	put_fd(descriptor);
1001 	return status;
1002 }
1003 
1004 
1005 status_t
1006 _user_close(int fd)
1007 {
1008 	return common_close(fd, false);
1009 }
1010 
1011 
1012 int
1013 _user_dup(int fd)
1014 {
1015 	return dup_fd(fd, false);
1016 }
1017 
1018 
1019 int
1020 _user_dup2(int ofd, int nfd)
1021 {
1022 	return dup2_fd(ofd, nfd, false);
1023 }
1024 
1025 
1026 //	#pragma mark - Kernel calls
1027 
1028 
1029 ssize_t
1030 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1031 {
1032 	if (pos < -1)
1033 		return B_BAD_VALUE;
1034 
1035 	FDGetter fdGetter;
1036 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1037 
1038 	if (!descriptor)
1039 		return B_FILE_ERROR;
1040 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1041 		return B_FILE_ERROR;
1042 
1043 	bool movePosition = false;
1044 	if (pos == -1) {
1045 		pos = descriptor->pos;
1046 		movePosition = true;
1047 	}
1048 
1049 	SyscallFlagUnsetter _;
1050 
1051 	if (descriptor->ops->fd_read == NULL)
1052 		return B_BAD_VALUE;
1053 
1054 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1055 		&length);
1056 	if (bytesRead >= B_OK) {
1057 		if (length > SSIZE_MAX)
1058 			bytesRead = SSIZE_MAX;
1059 		else
1060 			bytesRead = (ssize_t)length;
1061 
1062 		if (movePosition)
1063 			descriptor->pos = pos + length;
1064 	}
1065 
1066 	return bytesRead;
1067 }
1068 
1069 
1070 ssize_t
1071 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1072 {
1073 	bool movePosition = false;
1074 	status_t status;
1075 	uint32 i;
1076 
1077 	if (pos < -1)
1078 		return B_BAD_VALUE;
1079 
1080 	FDGetter fdGetter;
1081 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1082 
1083 	if (!descriptor)
1084 		return B_FILE_ERROR;
1085 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1086 		return B_FILE_ERROR;
1087 
1088 	if (pos == -1) {
1089 		pos = descriptor->pos;
1090 		movePosition = true;
1091 	}
1092 
1093 	if (descriptor->ops->fd_read == NULL)
1094 		return B_BAD_VALUE;
1095 
1096 	SyscallFlagUnsetter _;
1097 
1098 	ssize_t bytesRead = 0;
1099 
1100 	for (i = 0; i < count; i++) {
1101 		size_t length = vecs[i].iov_len;
1102 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1103 			&length);
1104 		if (status != B_OK) {
1105 			bytesRead = status;
1106 			break;
1107 		}
1108 
1109 		if ((uint64)bytesRead + length > SSIZE_MAX)
1110 			bytesRead = SSIZE_MAX;
1111 		else
1112 			bytesRead += (ssize_t)length;
1113 
1114 		pos += vecs[i].iov_len;
1115 	}
1116 
1117 	if (movePosition)
1118 		descriptor->pos = pos;
1119 
1120 	return bytesRead;
1121 }
1122 
1123 
1124 ssize_t
1125 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1126 {
1127 	if (pos < -1)
1128 		return B_BAD_VALUE;
1129 
1130 	FDGetter fdGetter;
1131 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1132 
1133 	if (descriptor == NULL)
1134 		return B_FILE_ERROR;
1135 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1136 		return B_FILE_ERROR;
1137 
1138 	bool movePosition = false;
1139 	if (pos == -1) {
1140 		pos = descriptor->pos;
1141 		movePosition = true;
1142 	}
1143 
1144 	if (descriptor->ops->fd_write == NULL)
1145 		return B_BAD_VALUE;
1146 
1147 	SyscallFlagUnsetter _;
1148 
1149 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1150 		&length);
1151 	if (bytesWritten >= B_OK) {
1152 		if (length > SSIZE_MAX)
1153 			bytesWritten = SSIZE_MAX;
1154 		else
1155 			bytesWritten = (ssize_t)length;
1156 
1157 		if (movePosition)
1158 			descriptor->pos = pos + length;
1159 	}
1160 
1161 	return bytesWritten;
1162 }
1163 
1164 
1165 ssize_t
1166 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1167 {
1168 	bool movePosition = false;
1169 	status_t status;
1170 	uint32 i;
1171 
1172 	if (pos < -1)
1173 		return B_BAD_VALUE;
1174 
1175 	FDGetter fdGetter;
1176 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1177 
1178 	if (!descriptor)
1179 		return B_FILE_ERROR;
1180 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1181 		return B_FILE_ERROR;
1182 
1183 	if (pos == -1) {
1184 		pos = descriptor->pos;
1185 		movePosition = true;
1186 	}
1187 
1188 	if (descriptor->ops->fd_write == NULL)
1189 		return B_BAD_VALUE;
1190 
1191 	SyscallFlagUnsetter _;
1192 
1193 	ssize_t bytesWritten = 0;
1194 
1195 	for (i = 0; i < count; i++) {
1196 		size_t length = vecs[i].iov_len;
1197 		status = descriptor->ops->fd_write(descriptor, pos,
1198 			vecs[i].iov_base, &length);
1199 		if (status != B_OK) {
1200 			bytesWritten = status;
1201 			break;
1202 		}
1203 
1204 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1205 			bytesWritten = SSIZE_MAX;
1206 		else
1207 			bytesWritten += (ssize_t)length;
1208 
1209 		pos += vecs[i].iov_len;
1210 	}
1211 
1212 	if (movePosition)
1213 		descriptor->pos = pos;
1214 
1215 	return bytesWritten;
1216 }
1217 
1218 
1219 off_t
1220 _kern_seek(int fd, off_t pos, int seekType)
1221 {
1222 	struct file_descriptor* descriptor;
1223 
1224 	descriptor = get_fd(get_current_io_context(true), fd);
1225 	if (!descriptor)
1226 		return B_FILE_ERROR;
1227 
1228 	if (descriptor->ops->fd_seek)
1229 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1230 	else
1231 		pos = ESPIPE;
1232 
1233 	put_fd(descriptor);
1234 	return pos;
1235 }
1236 
1237 
1238 status_t
1239 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1240 {
1241 	TRACE(("kern_ioctl: fd %d\n", fd));
1242 
1243 	SyscallFlagUnsetter _;
1244 
1245 	return fd_ioctl(true, fd, op, buffer, length);
1246 }
1247 
1248 
1249 ssize_t
1250 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1251 	uint32 maxCount)
1252 {
1253 	struct file_descriptor* descriptor;
1254 	ssize_t retval;
1255 
1256 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1257 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1258 
1259 	struct io_context* ioContext = get_current_io_context(true);
1260 	descriptor = get_fd(ioContext, fd);
1261 	if (descriptor == NULL)
1262 		return B_FILE_ERROR;
1263 
1264 	if (descriptor->ops->fd_read_dir) {
1265 		uint32 count = maxCount;
1266 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1267 			bufferSize, &count);
1268 		if (retval >= 0)
1269 			retval = count;
1270 	} else
1271 		retval = B_UNSUPPORTED;
1272 
1273 	put_fd(descriptor);
1274 	return retval;
1275 }
1276 
1277 
1278 status_t
1279 _kern_rewind_dir(int fd)
1280 {
1281 	struct file_descriptor* descriptor;
1282 	status_t status;
1283 
1284 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1285 
1286 	descriptor = get_fd(get_current_io_context(true), fd);
1287 	if (descriptor == NULL)
1288 		return B_FILE_ERROR;
1289 
1290 	if (descriptor->ops->fd_rewind_dir)
1291 		status = descriptor->ops->fd_rewind_dir(descriptor);
1292 	else
1293 		status = B_UNSUPPORTED;
1294 
1295 	put_fd(descriptor);
1296 	return status;
1297 }
1298 
1299 
1300 status_t
1301 _kern_close(int fd)
1302 {
1303 	return common_close(fd, true);
1304 }
1305 
1306 
1307 int
1308 _kern_dup(int fd)
1309 {
1310 	return dup_fd(fd, true);
1311 }
1312 
1313 
1314 int
1315 _kern_dup2(int ofd, int nfd)
1316 {
1317 	return dup2_fd(ofd, nfd, true);
1318 }
1319 
1320