xref: /haiku/src/system/kernel/fs/fd.cpp (revision d374a27286b8a52974a97dba0d5966ea026a665d)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 
20 #include <syscalls.h>
21 #include <syscall_restart.h>
22 #include <util/AutoLock.h>
23 #include <vfs.h>
24 #include <wait_for_objects.h>
25 
26 #include "vfs_tracing.h"
27 
28 
29 //#define TRACE_FD
30 #ifdef TRACE_FD
31 #	define TRACE(x) dprintf x
32 #else
33 #	define TRACE(x)
34 #endif
35 
36 
37 static const size_t kMaxReadDirBufferSize = 64 * 1024;
38 
39 
40 static struct file_descriptor* get_fd_locked(struct io_context* context,
41 	int fd);
42 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
43 static void deselect_select_infos(file_descriptor* descriptor,
44 	select_info* infos);
45 
46 
47 struct FDGetterLocking {
48 	inline bool Lock(file_descriptor* /*lockable*/)
49 	{
50 		return false;
51 	}
52 
53 	inline void Unlock(file_descriptor* lockable)
54 	{
55 		put_fd(lockable);
56 	}
57 };
58 
59 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
60 public:
61 	inline FDGetter()
62 		: AutoLocker<file_descriptor, FDGetterLocking>()
63 	{
64 	}
65 
66 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
67 		: AutoLocker<file_descriptor, FDGetterLocking>(
68 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
69 	{
70 	}
71 
72 	inline file_descriptor* SetTo(io_context* context, int fd,
73 		bool contextLocked = false)
74 	{
75 		file_descriptor* descriptor
76 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
77 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
78 		return descriptor;
79 	}
80 
81 	inline file_descriptor* SetTo(int fd, bool kernel,
82 		bool contextLocked = false)
83 	{
84 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
85 	}
86 
87 	inline file_descriptor* FD() const
88 	{
89 		return fLockable;
90 	}
91 };
92 
93 
94 //	#pragma mark - General fd routines
95 
96 
97 #ifdef DEBUG
98 void dump_fd(int fd, struct file_descriptor* descriptor);
99 
100 void
101 dump_fd(int fd,struct file_descriptor* descriptor)
102 {
103 	dprintf("fd[%d] = %p: type = %ld, ref_count = %ld, ops = %p, u.vnode = %p, "
104 		"u.mount = %p, cookie = %p, open_mode = %lx, pos = %Ld\n",
105 		fd, descriptor, descriptor->type, descriptor->ref_count,
106 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
107 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
108 }
109 #endif
110 
111 
112 /*! Allocates and initializes a new file_descriptor.
113 */
114 struct file_descriptor*
115 alloc_fd(void)
116 {
117 	file_descriptor* descriptor
118 		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
119 	if (descriptor == NULL)
120 		return NULL;
121 
122 	descriptor->u.vnode = NULL;
123 	descriptor->cookie = NULL;
124 	descriptor->ref_count = 1;
125 	descriptor->open_count = 0;
126 	descriptor->open_mode = 0;
127 	descriptor->pos = 0;
128 
129 	return descriptor;
130 }
131 
132 
133 bool
134 fd_close_on_exec(struct io_context* context, int fd)
135 {
136 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
137 }
138 
139 
140 void
141 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
142 {
143 	if (closeFD)
144 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
145 	else
146 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
147 }
148 
149 
150 /*!	Searches a free slot in the FD table of the provided I/O context, and
151 	inserts the specified descriptor into it.
152 */
153 int
154 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
155 	int firstIndex)
156 {
157 	int fd = -1;
158 	uint32 i;
159 
160 	mutex_lock(&context->io_mutex);
161 
162 	for (i = firstIndex; i < context->table_size; i++) {
163 		if (!context->fds[i]) {
164 			fd = i;
165 			break;
166 		}
167 	}
168 	if (fd < 0) {
169 		fd = B_NO_MORE_FDS;
170 		goto err;
171 	}
172 
173 	TFD(NewFD(context, fd, descriptor));
174 
175 	context->fds[fd] = descriptor;
176 	context->num_used_fds++;
177 	atomic_add(&descriptor->open_count, 1);
178 
179 err:
180 	mutex_unlock(&context->io_mutex);
181 
182 	return fd;
183 }
184 
185 
186 int
187 new_fd(struct io_context* context, struct file_descriptor* descriptor)
188 {
189 	return new_fd_etc(context, descriptor, 0);
190 }
191 
192 
193 /*!	Reduces the descriptor's reference counter, and frees all resources
194 	when it's no longer used.
195 */
196 void
197 put_fd(struct file_descriptor* descriptor)
198 {
199 	int32 previous = atomic_add(&descriptor->ref_count, -1);
200 
201 	TFD(PutFD(descriptor));
202 
203 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
204 		descriptor, descriptor->ref_count, descriptor->cookie));
205 
206 	// free the descriptor if we don't need it anymore
207 	if (previous == 1) {
208 		// free the underlying object
209 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
210 			descriptor->ops->fd_free(descriptor);
211 
212 		free(descriptor);
213 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
214 		&& previous - 1 == descriptor->open_count
215 		&& descriptor->ops != NULL) {
216 		// the descriptor has been disconnected - it cannot
217 		// be accessed anymore, let's close it (no one is
218 		// currently accessing this descriptor)
219 
220 		if (descriptor->ops->fd_close)
221 			descriptor->ops->fd_close(descriptor);
222 		if (descriptor->ops->fd_free)
223 			descriptor->ops->fd_free(descriptor);
224 
225 		// prevent this descriptor from being closed/freed again
226 		descriptor->open_count = -1;
227 		descriptor->ref_count = -1;
228 		descriptor->ops = NULL;
229 		descriptor->u.vnode = NULL;
230 
231 		// the file descriptor is kept intact, so that it's not
232 		// reused until someone explicetly closes it
233 	}
234 }
235 
236 
237 /*!	Decrements the open counter of the file descriptor and invokes
238 	its close hook when appropriate.
239 */
240 void
241 close_fd(struct file_descriptor* descriptor)
242 {
243 	if (atomic_add(&descriptor->open_count, -1) == 1) {
244 		vfs_unlock_vnode_if_locked(descriptor);
245 
246 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
247 			descriptor->ops->fd_close(descriptor);
248 	}
249 }
250 
251 
252 status_t
253 close_fd_index(struct io_context* context, int fd)
254 {
255 	struct file_descriptor* descriptor = remove_fd(context, fd);
256 
257 	if (descriptor == NULL)
258 		return B_FILE_ERROR;
259 
260 	close_fd(descriptor);
261 	put_fd(descriptor);
262 		// the reference associated with the slot
263 
264 	return B_OK;
265 }
266 
267 
268 /*!	This descriptor's underlying object will be closed and freed as soon as
269 	possible (in one of the next calls to put_fd() - get_fd() will no longer
270 	succeed on this descriptor).
271 	This is useful if the underlying object is gone, for instance when a
272 	(mounted) volume got removed unexpectedly.
273 */
274 void
275 disconnect_fd(struct file_descriptor* descriptor)
276 {
277 	descriptor->open_mode |= O_DISCONNECTED;
278 }
279 
280 
281 void
282 inc_fd_ref_count(struct file_descriptor* descriptor)
283 {
284 	atomic_add(&descriptor->ref_count, 1);
285 }
286 
287 
288 static struct file_descriptor*
289 get_fd_locked(struct io_context* context, int fd)
290 {
291 	if (fd < 0 || (uint32)fd >= context->table_size)
292 		return NULL;
293 
294 	struct file_descriptor* descriptor = context->fds[fd];
295 
296 	if (descriptor != NULL) {
297 		// Disconnected descriptors cannot be accessed anymore
298 		if (descriptor->open_mode & O_DISCONNECTED)
299 			descriptor = NULL;
300 		else {
301 			TFD(GetFD(context, fd, descriptor));
302 			inc_fd_ref_count(descriptor);
303 		}
304 	}
305 
306 	return descriptor;
307 }
308 
309 
310 struct file_descriptor*
311 get_fd(struct io_context* context, int fd)
312 {
313 	MutexLocker _(context->io_mutex);
314 
315 	return get_fd_locked(context, fd);
316 }
317 
318 
319 struct file_descriptor*
320 get_open_fd(struct io_context* context, int fd)
321 {
322 	MutexLocker _(context->io_mutex);
323 
324 	file_descriptor* descriptor = get_fd_locked(context, fd);
325 	if (descriptor == NULL)
326 		return NULL;
327 
328 	atomic_add(&descriptor->open_count, 1);
329 
330 	return descriptor;
331 }
332 
333 
334 /*!	Removes the file descriptor from the specified slot.
335 */
336 static struct file_descriptor*
337 remove_fd(struct io_context* context, int fd)
338 {
339 	struct file_descriptor* descriptor = NULL;
340 
341 	if (fd < 0)
342 		return NULL;
343 
344 	mutex_lock(&context->io_mutex);
345 
346 	if ((uint32)fd < context->table_size)
347 		descriptor = context->fds[fd];
348 
349 	select_info* selectInfos = NULL;
350 	bool disconnected = false;
351 
352 	if (descriptor != NULL)	{
353 		// fd is valid
354 		TFD(RemoveFD(context, fd, descriptor));
355 
356 		context->fds[fd] = NULL;
357 		fd_set_close_on_exec(context, fd, false);
358 		context->num_used_fds--;
359 
360 		selectInfos = context->select_infos[fd];
361 		context->select_infos[fd] = NULL;
362 
363 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
364 	}
365 
366 	mutex_unlock(&context->io_mutex);
367 
368 	if (selectInfos != NULL)
369 		deselect_select_infos(descriptor, selectInfos);
370 
371 	return disconnected ? NULL : descriptor;
372 }
373 
374 
375 static int
376 dup_fd(int fd, bool kernel)
377 {
378 	struct io_context* context = get_current_io_context(kernel);
379 	struct file_descriptor* descriptor;
380 	int status;
381 
382 	TRACE(("dup_fd: fd = %d\n", fd));
383 
384 	// Try to get the fd structure
385 	descriptor = get_fd(context, fd);
386 	if (descriptor == NULL)
387 		return B_FILE_ERROR;
388 
389 	// now put the fd in place
390 	status = new_fd(context, descriptor);
391 	if (status < 0)
392 		put_fd(descriptor);
393 	else {
394 		mutex_lock(&context->io_mutex);
395 		fd_set_close_on_exec(context, status, false);
396 		mutex_unlock(&context->io_mutex);
397 	}
398 
399 	return status;
400 }
401 
402 
403 /*!	POSIX says this should be the same as:
404 		close(newfd);
405 		fcntl(oldfd, F_DUPFD, newfd);
406 
407 	We do dup2() directly to be thread-safe.
408 */
409 static int
410 dup2_fd(int oldfd, int newfd, bool kernel)
411 {
412 	struct file_descriptor* evicted = NULL;
413 	struct io_context* context;
414 
415 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
416 
417 	// quick check
418 	if (oldfd < 0 || newfd < 0)
419 		return B_FILE_ERROR;
420 
421 	// Get current I/O context and lock it
422 	context = get_current_io_context(kernel);
423 	mutex_lock(&context->io_mutex);
424 
425 	// Check if the fds are valid (mutex must be locked because
426 	// the table size could be changed)
427 	if ((uint32)oldfd >= context->table_size
428 		|| (uint32)newfd >= context->table_size
429 		|| context->fds[oldfd] == NULL) {
430 		mutex_unlock(&context->io_mutex);
431 		return B_FILE_ERROR;
432 	}
433 
434 	// Check for identity, note that it cannot be made above
435 	// because we always want to return an error on invalid
436 	// handles
437 	select_info* selectInfos = NULL;
438 	if (oldfd != newfd) {
439 		// Now do the work
440 		TFD(Dup2FD(context, oldfd, newfd));
441 
442 		evicted = context->fds[newfd];
443 		selectInfos = context->select_infos[newfd];
444 		context->select_infos[newfd] = NULL;
445 		atomic_add(&context->fds[oldfd]->ref_count, 1);
446 		atomic_add(&context->fds[oldfd]->open_count, 1);
447 		context->fds[newfd] = context->fds[oldfd];
448 
449 		if (evicted == NULL)
450 			context->num_used_fds++;
451 	}
452 
453 	fd_set_close_on_exec(context, newfd, false);
454 
455 	mutex_unlock(&context->io_mutex);
456 
457 	// Say bye bye to the evicted fd
458 	if (evicted) {
459 		deselect_select_infos(evicted, selectInfos);
460 		close_fd(evicted);
461 		put_fd(evicted);
462 	}
463 
464 	return newfd;
465 }
466 
467 
468 /*!	Duplicates an FD from another team to this/the kernel team.
469 	\param fromTeam The team which owns the FD.
470 	\param fd The FD to duplicate.
471 	\param kernel If \c true, the new FD will be created in the kernel team,
472 			the current userland team otherwise.
473 	\return The newly created FD or an error code, if something went wrong.
474 */
475 int
476 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
477 {
478 	// get the I/O context for the team in question
479 	InterruptsSpinLocker teamsLocker(gTeamSpinlock);
480 	Team* team = team_get_team_struct_locked(fromTeam);
481 	if (team == NULL)
482 		return B_BAD_TEAM_ID;
483 
484 	io_context* fromContext = team->io_context;
485 	vfs_get_io_context(fromContext);
486 
487 	teamsLocker.Unlock();
488 
489 	CObjectDeleter<io_context> _(fromContext, vfs_put_io_context);
490 
491 	// get the file descriptor
492 	file_descriptor* descriptor = get_fd(fromContext, fd);
493 	if (descriptor == NULL)
494 		return B_FILE_ERROR;
495 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
496 
497 	// create a new FD in the target I/O context
498 	int result = new_fd(get_current_io_context(kernel), descriptor);
499 	if (result >= 0) {
500 		// the descriptor reference belongs to the slot, now
501 		descriptorPutter.Detach();
502 	}
503 
504 	return result;
505 }
506 
507 
508 static status_t
509 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
510 {
511 	struct file_descriptor* descriptor;
512 	int status;
513 
514 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
515 	if (descriptor == NULL)
516 		return B_FILE_ERROR;
517 
518 	if (descriptor->ops->fd_ioctl)
519 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
520 	else
521 		status = B_DEV_INVALID_IOCTL;
522 
523 	if (status == B_DEV_INVALID_IOCTL)
524 		status = ENOTTY;
525 
526 	put_fd(descriptor);
527 	return status;
528 }
529 
530 
531 static void
532 deselect_select_infos(file_descriptor* descriptor, select_info* infos)
533 {
534 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
535 
536 	select_info* info = infos;
537 	while (info != NULL) {
538 		select_sync* sync = info->sync;
539 
540 		// deselect the selected events
541 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
542 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
543 			for (uint16 event = 1; event < 16; event++) {
544 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
545 					descriptor->ops->fd_deselect(descriptor, event,
546 						(selectsync*)info);
547 				}
548 			}
549 		}
550 
551 		notify_select_events(info, B_EVENT_INVALID);
552 		info = info->next;
553 		put_select_sync(sync);
554 	}
555 }
556 
557 
558 status_t
559 select_fd(int32 fd, struct select_info* info, bool kernel)
560 {
561 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
562 		info->sync, info->selected_events));
563 
564 	FDGetter fdGetter;
565 		// define before the context locker, so it will be destroyed after it
566 
567 	io_context* context = get_current_io_context(kernel);
568 	MutexLocker locker(context->io_mutex);
569 
570 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
571 	if (descriptor == NULL)
572 		return B_FILE_ERROR;
573 
574 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
575 
576 	if (descriptor->ops->fd_select == NULL && eventsToSelect != 0) {
577 		// if the I/O subsystem doesn't support select(), we will
578 		// immediately notify the select call
579 		return notify_select_events(info, eventsToSelect);
580 	}
581 
582 	// We need the FD to stay open while we're doing this, so no select()/
583 	// deselect() will be called on it after it is closed.
584 	atomic_add(&descriptor->open_count, 1);
585 
586 	locker.Unlock();
587 
588 	// select any events asked for
589 	uint32 selectedEvents = 0;
590 
591 	for (uint16 event = 1; event < 16; event++) {
592 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
593 			&& descriptor->ops->fd_select(descriptor, event,
594 				(selectsync*)info) == B_OK) {
595 			selectedEvents |= SELECT_FLAG(event);
596 		}
597 	}
598 	info->selected_events = selectedEvents
599 		| (info->selected_events & B_EVENT_INVALID);
600 
601 	// Add the info to the IO context. Even if nothing has been selected -- we
602 	// always support B_EVENT_INVALID.
603 	locker.Lock();
604 	if (context->fds[fd] != descriptor) {
605 		// Someone close()d the index in the meantime. deselect() all
606 		// events.
607 		info->next = NULL;
608 		deselect_select_infos(descriptor, info);
609 
610 		// Release our open reference of the descriptor.
611 		close_fd(descriptor);
612 		return B_FILE_ERROR;
613 	}
614 
615 	// The FD index hasn't changed, so we add the select info to the table.
616 
617 	info->next = context->select_infos[fd];
618 	context->select_infos[fd] = info;
619 
620 	// As long as the info is in the list, we keep a reference to the sync
621 	// object.
622 	atomic_add(&info->sync->ref_count, 1);
623 
624 	// Finally release our open reference. It is safe just to decrement,
625 	// since as long as the descriptor is associated with the slot,
626 	// someone else still has it open.
627 	atomic_add(&descriptor->open_count, -1);
628 
629 	return B_OK;
630 }
631 
632 
633 status_t
634 deselect_fd(int32 fd, struct select_info* info, bool kernel)
635 {
636 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
637 		info->sync, info->selected_events));
638 
639 	FDGetter fdGetter;
640 		// define before the context locker, so it will be destroyed after it
641 
642 	io_context* context = get_current_io_context(kernel);
643 	MutexLocker locker(context->io_mutex);
644 
645 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
646 	if (descriptor == NULL)
647 		return B_FILE_ERROR;
648 
649 	// remove the info from the IO context
650 
651 	select_info** infoLocation = &context->select_infos[fd];
652 	while (*infoLocation != NULL && *infoLocation != info)
653 		infoLocation = &(*infoLocation)->next;
654 
655 	// If not found, someone else beat us to it.
656 	if (*infoLocation != info)
657 		return B_OK;
658 
659 	*infoLocation = info->next;
660 
661 	locker.Unlock();
662 
663 	// deselect the selected events
664 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
665 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
666 		for (uint16 event = 1; event < 16; event++) {
667 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
668 				descriptor->ops->fd_deselect(descriptor, event,
669 					(selectsync*)info);
670 			}
671 		}
672 	}
673 
674 	put_select_sync(info->sync);
675 
676 	return B_OK;
677 }
678 
679 
680 /*!	This function checks if the specified fd is valid in the current
681 	context. It can be used for a quick check; the fd is not locked
682 	so it could become invalid immediately after this check.
683 */
684 bool
685 fd_is_valid(int fd, bool kernel)
686 {
687 	struct file_descriptor* descriptor
688 		= get_fd(get_current_io_context(kernel), fd);
689 	if (descriptor == NULL)
690 		return false;
691 
692 	put_fd(descriptor);
693 	return true;
694 }
695 
696 
697 struct vnode*
698 fd_vnode(struct file_descriptor* descriptor)
699 {
700 	switch (descriptor->type) {
701 		case FDTYPE_FILE:
702 		case FDTYPE_DIR:
703 		case FDTYPE_ATTR_DIR:
704 		case FDTYPE_ATTR:
705 			return descriptor->u.vnode;
706 	}
707 
708 	return NULL;
709 }
710 
711 
712 static status_t
713 common_close(int fd, bool kernel)
714 {
715 	return close_fd_index(get_current_io_context(kernel), fd);
716 }
717 
718 
719 static ssize_t
720 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
721 {
722 	if (!IS_USER_ADDRESS(buffer))
723 		return B_BAD_ADDRESS;
724 
725 	if (pos < -1)
726 		return B_BAD_VALUE;
727 
728 	FDGetter fdGetter;
729 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
730 	if (!descriptor)
731 		return B_FILE_ERROR;
732 
733 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
734 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
735 		return B_FILE_ERROR;
736 	}
737 
738 	bool movePosition = false;
739 	if (pos == -1) {
740 		pos = descriptor->pos;
741 		movePosition = true;
742 	}
743 
744 	if (write ? descriptor->ops->fd_write == NULL
745 			: descriptor->ops->fd_read == NULL) {
746 		return B_BAD_VALUE;
747 	}
748 
749 	SyscallRestartWrapper<status_t> status;
750 
751 	if (write)
752 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
753 	else
754 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
755 
756 	if (status != B_OK)
757 		return status;
758 
759 	if (movePosition)
760 		descriptor->pos = pos + length;
761 
762 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
763 }
764 
765 
766 static ssize_t
767 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
768 	bool write)
769 {
770 	if (!IS_USER_ADDRESS(userVecs))
771 		return B_BAD_ADDRESS;
772 
773 	if (pos < -1)
774 		return B_BAD_VALUE;
775 
776 	// prevent integer overflow exploit in malloc()
777 	if (count > IOV_MAX)
778 		return B_BAD_VALUE;
779 
780 	FDGetter fdGetter;
781 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
782 	if (!descriptor)
783 		return B_FILE_ERROR;
784 
785 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
786 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
787 		return B_FILE_ERROR;
788 	}
789 
790 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
791 	if (vecs == NULL)
792 		return B_NO_MEMORY;
793 	MemoryDeleter _(vecs);
794 
795 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
796 		return B_BAD_ADDRESS;
797 
798 	bool movePosition = false;
799 	if (pos == -1) {
800 		pos = descriptor->pos;
801 		movePosition = true;
802 	}
803 
804 	if (write ? descriptor->ops->fd_write == NULL
805 			: descriptor->ops->fd_read == NULL) {
806 		return B_BAD_VALUE;
807 	}
808 
809 	SyscallRestartWrapper<status_t> status;
810 
811 	ssize_t bytesTransferred = 0;
812 	for (uint32 i = 0; i < count; i++) {
813 		size_t length = vecs[i].iov_len;
814 		if (write) {
815 			status = descriptor->ops->fd_write(descriptor, pos,
816 				vecs[i].iov_base, &length);
817 		} else {
818 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
819 				&length);
820 		}
821 
822 		if (status != B_OK) {
823 			if (bytesTransferred == 0)
824 				return status;
825 			status = B_OK;
826 			break;
827 		}
828 
829 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
830 			bytesTransferred = SSIZE_MAX;
831 		else
832 			bytesTransferred += (ssize_t)length;
833 
834 		pos += length;
835 
836 		if (length < vecs[i].iov_len)
837 			break;
838 	}
839 
840 	if (movePosition)
841 		descriptor->pos = pos;
842 
843 	return bytesTransferred;
844 }
845 
846 
847 status_t
848 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
849 {
850 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
851 
852 	return fd_ioctl(false, fd, op, buffer, length);
853 }
854 
855 
856 //	#pragma mark - User syscalls
857 
858 
859 ssize_t
860 _user_read(int fd, off_t pos, void* buffer, size_t length)
861 {
862 	return common_user_io(fd, pos, buffer, length, false);
863 }
864 
865 
866 ssize_t
867 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
868 {
869 	return common_user_vector_io(fd, pos, userVecs, count, false);
870 }
871 
872 
873 ssize_t
874 _user_write(int fd, off_t pos, const void* buffer, size_t length)
875 {
876 	return common_user_io(fd, pos, (void*)buffer, length, true);
877 }
878 
879 
880 ssize_t
881 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
882 {
883 	return common_user_vector_io(fd, pos, userVecs, count, true);
884 }
885 
886 
887 off_t
888 _user_seek(int fd, off_t pos, int seekType)
889 {
890 	syscall_64_bit_return_value();
891 
892 	struct file_descriptor* descriptor;
893 
894 	descriptor = get_fd(get_current_io_context(false), fd);
895 	if (!descriptor)
896 		return B_FILE_ERROR;
897 
898 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
899 
900 	if (descriptor->ops->fd_seek)
901 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
902 	else
903 		pos = ESPIPE;
904 
905 	put_fd(descriptor);
906 	return pos;
907 }
908 
909 
910 status_t
911 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
912 {
913 	if (!IS_USER_ADDRESS(buffer))
914 		return B_BAD_ADDRESS;
915 
916 	TRACE(("user_ioctl: fd %d\n", fd));
917 
918 	SyscallRestartWrapper<status_t> status;
919 
920 	return status = fd_ioctl(false, fd, op, buffer, length);
921 }
922 
923 
924 ssize_t
925 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
926 	uint32 maxCount)
927 {
928 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
929 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
930 
931 	if (maxCount == 0)
932 		return 0;
933 
934 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
935 		return B_BAD_ADDRESS;
936 
937 	// get I/O context and FD
938 	io_context* ioContext = get_current_io_context(false);
939 	FDGetter fdGetter;
940 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
941 	if (descriptor == NULL)
942 		return B_FILE_ERROR;
943 
944 	if (descriptor->ops->fd_read_dir == NULL)
945 		return B_UNSUPPORTED;
946 
947 	// restrict buffer size and allocate a heap buffer
948 	if (bufferSize > kMaxReadDirBufferSize)
949 		bufferSize = kMaxReadDirBufferSize;
950 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
951 	if (buffer == NULL)
952 		return B_NO_MEMORY;
953 	MemoryDeleter bufferDeleter(buffer);
954 
955 	// read the directory
956 	uint32 count = maxCount;
957 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
958 		buffer, bufferSize, &count);
959 	if (status != B_OK)
960 		return status;
961 
962 	// copy the buffer back -- determine the total buffer size first
963 	size_t sizeToCopy = 0;
964 	struct dirent* entry = buffer;
965 	for (uint32 i = 0; i < count; i++) {
966 		size_t length = entry->d_reclen;
967 		sizeToCopy += length;
968 		entry = (struct dirent*)((uint8*)entry + length);
969 	}
970 
971 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
972 		return B_BAD_ADDRESS;
973 
974 	return count;
975 }
976 
977 
978 status_t
979 _user_rewind_dir(int fd)
980 {
981 	struct file_descriptor* descriptor;
982 	status_t status;
983 
984 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
985 
986 	descriptor = get_fd(get_current_io_context(false), fd);
987 	if (descriptor == NULL)
988 		return B_FILE_ERROR;
989 
990 	if (descriptor->ops->fd_rewind_dir)
991 		status = descriptor->ops->fd_rewind_dir(descriptor);
992 	else
993 		status = B_NOT_SUPPORTED;
994 
995 	put_fd(descriptor);
996 	return status;
997 }
998 
999 
1000 status_t
1001 _user_close(int fd)
1002 {
1003 	return common_close(fd, false);
1004 }
1005 
1006 
1007 int
1008 _user_dup(int fd)
1009 {
1010 	return dup_fd(fd, false);
1011 }
1012 
1013 
1014 int
1015 _user_dup2(int ofd, int nfd)
1016 {
1017 	return dup2_fd(ofd, nfd, false);
1018 }
1019 
1020 
1021 //	#pragma mark - Kernel calls
1022 
1023 
1024 ssize_t
1025 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1026 {
1027 	if (pos < -1)
1028 		return B_BAD_VALUE;
1029 
1030 	FDGetter fdGetter;
1031 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1032 
1033 	if (!descriptor)
1034 		return B_FILE_ERROR;
1035 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1036 		return B_FILE_ERROR;
1037 
1038 	bool movePosition = false;
1039 	if (pos == -1) {
1040 		pos = descriptor->pos;
1041 		movePosition = true;
1042 	}
1043 
1044 	SyscallFlagUnsetter _;
1045 
1046 	if (descriptor->ops->fd_read == NULL)
1047 		return B_BAD_VALUE;
1048 
1049 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1050 		&length);
1051 	if (bytesRead >= B_OK) {
1052 		if (length > SSIZE_MAX)
1053 			bytesRead = SSIZE_MAX;
1054 		else
1055 			bytesRead = (ssize_t)length;
1056 
1057 		if (movePosition)
1058 			descriptor->pos = pos + length;
1059 	}
1060 
1061 	return bytesRead;
1062 }
1063 
1064 
1065 ssize_t
1066 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1067 {
1068 	bool movePosition = false;
1069 	status_t status;
1070 	uint32 i;
1071 
1072 	if (pos < -1)
1073 		return B_BAD_VALUE;
1074 
1075 	FDGetter fdGetter;
1076 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1077 
1078 	if (!descriptor)
1079 		return B_FILE_ERROR;
1080 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1081 		return B_FILE_ERROR;
1082 
1083 	if (pos == -1) {
1084 		pos = descriptor->pos;
1085 		movePosition = true;
1086 	}
1087 
1088 	if (descriptor->ops->fd_read == NULL)
1089 		return B_BAD_VALUE;
1090 
1091 	SyscallFlagUnsetter _;
1092 
1093 	ssize_t bytesRead = 0;
1094 
1095 	for (i = 0; i < count; i++) {
1096 		size_t length = vecs[i].iov_len;
1097 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1098 			&length);
1099 		if (status != B_OK) {
1100 			bytesRead = status;
1101 			break;
1102 		}
1103 
1104 		if ((uint64)bytesRead + length > SSIZE_MAX)
1105 			bytesRead = SSIZE_MAX;
1106 		else
1107 			bytesRead += (ssize_t)length;
1108 
1109 		pos += vecs[i].iov_len;
1110 	}
1111 
1112 	if (movePosition)
1113 		descriptor->pos = pos;
1114 
1115 	return bytesRead;
1116 }
1117 
1118 
1119 ssize_t
1120 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1121 {
1122 	if (pos < -1)
1123 		return B_BAD_VALUE;
1124 
1125 	FDGetter fdGetter;
1126 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1127 
1128 	if (descriptor == NULL)
1129 		return B_FILE_ERROR;
1130 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1131 		return B_FILE_ERROR;
1132 
1133 	bool movePosition = false;
1134 	if (pos == -1) {
1135 		pos = descriptor->pos;
1136 		movePosition = true;
1137 	}
1138 
1139 	if (descriptor->ops->fd_write == NULL)
1140 		return B_BAD_VALUE;
1141 
1142 	SyscallFlagUnsetter _;
1143 
1144 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1145 		&length);
1146 	if (bytesWritten >= B_OK) {
1147 		if (length > SSIZE_MAX)
1148 			bytesWritten = SSIZE_MAX;
1149 		else
1150 			bytesWritten = (ssize_t)length;
1151 
1152 		if (movePosition)
1153 			descriptor->pos = pos + length;
1154 	}
1155 
1156 	return bytesWritten;
1157 }
1158 
1159 
1160 ssize_t
1161 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1162 {
1163 	bool movePosition = false;
1164 	status_t status;
1165 	uint32 i;
1166 
1167 	if (pos < -1)
1168 		return B_BAD_VALUE;
1169 
1170 	FDGetter fdGetter;
1171 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1172 
1173 	if (!descriptor)
1174 		return B_FILE_ERROR;
1175 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1176 		return B_FILE_ERROR;
1177 
1178 	if (pos == -1) {
1179 		pos = descriptor->pos;
1180 		movePosition = true;
1181 	}
1182 
1183 	if (descriptor->ops->fd_write == NULL)
1184 		return B_BAD_VALUE;
1185 
1186 	SyscallFlagUnsetter _;
1187 
1188 	ssize_t bytesWritten = 0;
1189 
1190 	for (i = 0; i < count; i++) {
1191 		size_t length = vecs[i].iov_len;
1192 		status = descriptor->ops->fd_write(descriptor, pos,
1193 			vecs[i].iov_base, &length);
1194 		if (status != B_OK) {
1195 			bytesWritten = status;
1196 			break;
1197 		}
1198 
1199 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1200 			bytesWritten = SSIZE_MAX;
1201 		else
1202 			bytesWritten += (ssize_t)length;
1203 
1204 		pos += vecs[i].iov_len;
1205 	}
1206 
1207 	if (movePosition)
1208 		descriptor->pos = pos;
1209 
1210 	return bytesWritten;
1211 }
1212 
1213 
1214 off_t
1215 _kern_seek(int fd, off_t pos, int seekType)
1216 {
1217 	struct file_descriptor* descriptor;
1218 
1219 	descriptor = get_fd(get_current_io_context(true), fd);
1220 	if (!descriptor)
1221 		return B_FILE_ERROR;
1222 
1223 	if (descriptor->ops->fd_seek)
1224 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1225 	else
1226 		pos = ESPIPE;
1227 
1228 	put_fd(descriptor);
1229 	return pos;
1230 }
1231 
1232 
1233 status_t
1234 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1235 {
1236 	TRACE(("kern_ioctl: fd %d\n", fd));
1237 
1238 	SyscallFlagUnsetter _;
1239 
1240 	return fd_ioctl(true, fd, op, buffer, length);
1241 }
1242 
1243 
1244 ssize_t
1245 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1246 	uint32 maxCount)
1247 {
1248 	struct file_descriptor* descriptor;
1249 	ssize_t retval;
1250 
1251 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1252 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1253 
1254 	struct io_context* ioContext = get_current_io_context(true);
1255 	descriptor = get_fd(ioContext, fd);
1256 	if (descriptor == NULL)
1257 		return B_FILE_ERROR;
1258 
1259 	if (descriptor->ops->fd_read_dir) {
1260 		uint32 count = maxCount;
1261 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1262 			bufferSize, &count);
1263 		if (retval >= 0)
1264 			retval = count;
1265 	} else
1266 		retval = B_NOT_SUPPORTED;
1267 
1268 	put_fd(descriptor);
1269 	return retval;
1270 }
1271 
1272 
1273 status_t
1274 _kern_rewind_dir(int fd)
1275 {
1276 	struct file_descriptor* descriptor;
1277 	status_t status;
1278 
1279 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1280 
1281 	descriptor = get_fd(get_current_io_context(true), fd);
1282 	if (descriptor == NULL)
1283 		return B_FILE_ERROR;
1284 
1285 	if (descriptor->ops->fd_rewind_dir)
1286 		status = descriptor->ops->fd_rewind_dir(descriptor);
1287 	else
1288 		status = B_NOT_SUPPORTED;
1289 
1290 	put_fd(descriptor);
1291 	return status;
1292 }
1293 
1294 
1295 status_t
1296 _kern_close(int fd)
1297 {
1298 	return common_close(fd, true);
1299 }
1300 
1301 
1302 int
1303 _kern_dup(int fd)
1304 {
1305 	return dup_fd(fd, true);
1306 }
1307 
1308 
1309 int
1310 _kern_dup2(int ofd, int nfd)
1311 {
1312 	return dup2_fd(ofd, nfd, true);
1313 }
1314 
1315