xref: /haiku/src/system/kernel/fs/fd.cpp (revision 820dca4df6c7bf955c46e8f6521b9408f50b2900)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 
20 #include <syscalls.h>
21 #include <syscall_restart.h>
22 #include <util/AutoLock.h>
23 #include <vfs.h>
24 #include <wait_for_objects.h>
25 
26 #include "vfs_tracing.h"
27 
28 
29 //#define TRACE_FD
30 #ifdef TRACE_FD
31 #	define TRACE(x) dprintf x
32 #else
33 #	define TRACE(x)
34 #endif
35 
36 
37 static const size_t kMaxReadDirBufferSize = 64 * 1024;
38 
39 
40 static struct file_descriptor* get_fd_locked(struct io_context* context,
41 	int fd);
42 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
43 static void deselect_select_infos(file_descriptor* descriptor,
44 	select_info* infos);
45 
46 
47 struct FDGetterLocking {
48 	inline bool Lock(file_descriptor* /*lockable*/)
49 	{
50 		return false;
51 	}
52 
53 	inline void Unlock(file_descriptor* lockable)
54 	{
55 		put_fd(lockable);
56 	}
57 };
58 
59 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
60 public:
61 	inline FDGetter()
62 		: AutoLocker<file_descriptor, FDGetterLocking>()
63 	{
64 	}
65 
66 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
67 		: AutoLocker<file_descriptor, FDGetterLocking>(
68 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
69 	{
70 	}
71 
72 	inline file_descriptor* SetTo(io_context* context, int fd,
73 		bool contextLocked = false)
74 	{
75 		file_descriptor* descriptor
76 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
77 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
78 		return descriptor;
79 	}
80 
81 	inline file_descriptor* SetTo(int fd, bool kernel,
82 		bool contextLocked = false)
83 	{
84 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
85 	}
86 
87 	inline file_descriptor* FD() const
88 	{
89 		return fLockable;
90 	}
91 };
92 
93 
94 //	#pragma mark - General fd routines
95 
96 
97 #ifdef DEBUG
98 void dump_fd(int fd, struct file_descriptor* descriptor);
99 
100 void
101 dump_fd(int fd,struct file_descriptor* descriptor)
102 {
103 	dprintf("fd[%d] = %p: type = %ld, ref_count = %ld, ops = %p, u.vnode = %p, "
104 		"u.mount = %p, cookie = %p, open_mode = %lx, pos = %Ld\n",
105 		fd, descriptor, descriptor->type, descriptor->ref_count,
106 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
107 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
108 }
109 #endif
110 
111 
112 /*! Allocates and initializes a new file_descriptor.
113 */
114 struct file_descriptor*
115 alloc_fd(void)
116 {
117 	file_descriptor* descriptor
118 		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
119 	if (descriptor == NULL)
120 		return NULL;
121 
122 	descriptor->u.vnode = NULL;
123 	descriptor->cookie = NULL;
124 	descriptor->ref_count = 1;
125 	descriptor->open_count = 0;
126 	descriptor->open_mode = 0;
127 	descriptor->pos = 0;
128 
129 	return descriptor;
130 }
131 
132 
133 bool
134 fd_close_on_exec(struct io_context* context, int fd)
135 {
136 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
137 }
138 
139 
140 void
141 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
142 {
143 	if (closeFD)
144 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
145 	else
146 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
147 }
148 
149 
150 /*!	Searches a free slot in the FD table of the provided I/O context, and
151 	inserts the specified descriptor into it.
152 */
153 int
154 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
155 	int firstIndex)
156 {
157 	int fd = -1;
158 	uint32 i;
159 
160 	mutex_lock(&context->io_mutex);
161 
162 	for (i = firstIndex; i < context->table_size; i++) {
163 		if (!context->fds[i]) {
164 			fd = i;
165 			break;
166 		}
167 	}
168 	if (fd < 0) {
169 		fd = B_NO_MORE_FDS;
170 		goto err;
171 	}
172 
173 	TFD(NewFD(context, fd, descriptor));
174 
175 	context->fds[fd] = descriptor;
176 	context->num_used_fds++;
177 	atomic_add(&descriptor->open_count, 1);
178 
179 err:
180 	mutex_unlock(&context->io_mutex);
181 
182 	return fd;
183 }
184 
185 
186 int
187 new_fd(struct io_context* context, struct file_descriptor* descriptor)
188 {
189 	return new_fd_etc(context, descriptor, 0);
190 }
191 
192 
193 /*!	Reduces the descriptor's reference counter, and frees all resources
194 	when it's no longer used.
195 */
196 void
197 put_fd(struct file_descriptor* descriptor)
198 {
199 	int32 previous = atomic_add(&descriptor->ref_count, -1);
200 
201 	TFD(PutFD(descriptor));
202 
203 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
204 		descriptor, descriptor->ref_count, descriptor->cookie));
205 
206 	// free the descriptor if we don't need it anymore
207 	if (previous == 1) {
208 		// free the underlying object
209 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
210 			descriptor->ops->fd_free(descriptor);
211 
212 		free(descriptor);
213 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
214 		&& previous - 1 == descriptor->open_count
215 		&& descriptor->ops != NULL) {
216 		// the descriptor has been disconnected - it cannot
217 		// be accessed anymore, let's close it (no one is
218 		// currently accessing this descriptor)
219 
220 		if (descriptor->ops->fd_close)
221 			descriptor->ops->fd_close(descriptor);
222 		if (descriptor->ops->fd_free)
223 			descriptor->ops->fd_free(descriptor);
224 
225 		// prevent this descriptor from being closed/freed again
226 		descriptor->open_count = -1;
227 		descriptor->ref_count = -1;
228 		descriptor->ops = NULL;
229 		descriptor->u.vnode = NULL;
230 
231 		// the file descriptor is kept intact, so that it's not
232 		// reused until someone explicetly closes it
233 	}
234 }
235 
236 
237 /*!	Decrements the open counter of the file descriptor and invokes
238 	its close hook when appropriate.
239 */
240 void
241 close_fd(struct file_descriptor* descriptor)
242 {
243 	if (atomic_add(&descriptor->open_count, -1) == 1) {
244 		vfs_unlock_vnode_if_locked(descriptor);
245 
246 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
247 			descriptor->ops->fd_close(descriptor);
248 	}
249 }
250 
251 
252 status_t
253 close_fd_index(struct io_context* context, int fd)
254 {
255 	struct file_descriptor* descriptor = remove_fd(context, fd);
256 
257 	if (descriptor == NULL)
258 		return B_FILE_ERROR;
259 
260 	close_fd(descriptor);
261 	put_fd(descriptor);
262 		// the reference associated with the slot
263 
264 	return B_OK;
265 }
266 
267 
268 /*!	This descriptor's underlying object will be closed and freed as soon as
269 	possible (in one of the next calls to put_fd() - get_fd() will no longer
270 	succeed on this descriptor).
271 	This is useful if the underlying object is gone, for instance when a
272 	(mounted) volume got removed unexpectedly.
273 */
274 void
275 disconnect_fd(struct file_descriptor* descriptor)
276 {
277 	descriptor->open_mode |= O_DISCONNECTED;
278 }
279 
280 
281 void
282 inc_fd_ref_count(struct file_descriptor* descriptor)
283 {
284 	atomic_add(&descriptor->ref_count, 1);
285 }
286 
287 
288 static struct file_descriptor*
289 get_fd_locked(struct io_context* context, int fd)
290 {
291 	if (fd < 0 || (uint32)fd >= context->table_size)
292 		return NULL;
293 
294 	struct file_descriptor* descriptor = context->fds[fd];
295 
296 	if (descriptor != NULL) {
297 		// Disconnected descriptors cannot be accessed anymore
298 		if (descriptor->open_mode & O_DISCONNECTED)
299 			descriptor = NULL;
300 		else {
301 			TFD(GetFD(context, fd, descriptor));
302 			inc_fd_ref_count(descriptor);
303 		}
304 	}
305 
306 	return descriptor;
307 }
308 
309 
310 struct file_descriptor*
311 get_fd(struct io_context* context, int fd)
312 {
313 	MutexLocker _(context->io_mutex);
314 
315 	return get_fd_locked(context, fd);
316 }
317 
318 
319 struct file_descriptor*
320 get_open_fd(struct io_context* context, int fd)
321 {
322 	MutexLocker _(context->io_mutex);
323 
324 	file_descriptor* descriptor = get_fd_locked(context, fd);
325 	if (descriptor == NULL)
326 		return NULL;
327 
328 	atomic_add(&descriptor->open_count, 1);
329 
330 	return descriptor;
331 }
332 
333 
334 /*!	Removes the file descriptor from the specified slot.
335 */
336 static struct file_descriptor*
337 remove_fd(struct io_context* context, int fd)
338 {
339 	struct file_descriptor* descriptor = NULL;
340 
341 	if (fd < 0)
342 		return NULL;
343 
344 	mutex_lock(&context->io_mutex);
345 
346 	if ((uint32)fd < context->table_size)
347 		descriptor = context->fds[fd];
348 
349 	select_info* selectInfos = NULL;
350 	bool disconnected = false;
351 
352 	if (descriptor != NULL)	{
353 		// fd is valid
354 		TFD(RemoveFD(context, fd, descriptor));
355 
356 		context->fds[fd] = NULL;
357 		fd_set_close_on_exec(context, fd, false);
358 		context->num_used_fds--;
359 
360 		selectInfos = context->select_infos[fd];
361 		context->select_infos[fd] = NULL;
362 
363 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
364 	}
365 
366 	mutex_unlock(&context->io_mutex);
367 
368 	if (selectInfos != NULL)
369 		deselect_select_infos(descriptor, selectInfos);
370 
371 	return disconnected ? NULL : descriptor;
372 }
373 
374 
375 static int
376 dup_fd(int fd, bool kernel)
377 {
378 	struct io_context* context = get_current_io_context(kernel);
379 	struct file_descriptor* descriptor;
380 	int status;
381 
382 	TRACE(("dup_fd: fd = %d\n", fd));
383 
384 	// Try to get the fd structure
385 	descriptor = get_fd(context, fd);
386 	if (descriptor == NULL)
387 		return B_FILE_ERROR;
388 
389 	// now put the fd in place
390 	status = new_fd(context, descriptor);
391 	if (status < 0)
392 		put_fd(descriptor);
393 	else {
394 		mutex_lock(&context->io_mutex);
395 		fd_set_close_on_exec(context, status, false);
396 		mutex_unlock(&context->io_mutex);
397 	}
398 
399 	return status;
400 }
401 
402 
403 /*!	POSIX says this should be the same as:
404 		close(newfd);
405 		fcntl(oldfd, F_DUPFD, newfd);
406 
407 	We do dup2() directly to be thread-safe.
408 */
409 static int
410 dup2_fd(int oldfd, int newfd, bool kernel)
411 {
412 	struct file_descriptor* evicted = NULL;
413 	struct io_context* context;
414 
415 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
416 
417 	// quick check
418 	if (oldfd < 0 || newfd < 0)
419 		return B_FILE_ERROR;
420 
421 	// Get current I/O context and lock it
422 	context = get_current_io_context(kernel);
423 	mutex_lock(&context->io_mutex);
424 
425 	// Check if the fds are valid (mutex must be locked because
426 	// the table size could be changed)
427 	if ((uint32)oldfd >= context->table_size
428 		|| (uint32)newfd >= context->table_size
429 		|| context->fds[oldfd] == NULL) {
430 		mutex_unlock(&context->io_mutex);
431 		return B_FILE_ERROR;
432 	}
433 
434 	// Check for identity, note that it cannot be made above
435 	// because we always want to return an error on invalid
436 	// handles
437 	select_info* selectInfos = NULL;
438 	if (oldfd != newfd) {
439 		// Now do the work
440 		TFD(Dup2FD(context, oldfd, newfd));
441 
442 		evicted = context->fds[newfd];
443 		selectInfos = context->select_infos[newfd];
444 		context->select_infos[newfd] = NULL;
445 		atomic_add(&context->fds[oldfd]->ref_count, 1);
446 		atomic_add(&context->fds[oldfd]->open_count, 1);
447 		context->fds[newfd] = context->fds[oldfd];
448 
449 		if (evicted == NULL)
450 			context->num_used_fds++;
451 	}
452 
453 	fd_set_close_on_exec(context, newfd, false);
454 
455 	mutex_unlock(&context->io_mutex);
456 
457 	// Say bye bye to the evicted fd
458 	if (evicted) {
459 		deselect_select_infos(evicted, selectInfos);
460 		close_fd(evicted);
461 		put_fd(evicted);
462 	}
463 
464 	return newfd;
465 }
466 
467 
468 /*!	Duplicates an FD from another team to this/the kernel team.
469 	\param fromTeam The team which owns the FD.
470 	\param fd The FD to duplicate.
471 	\param kernel If \c true, the new FD will be created in the kernel team,
472 			the current userland team otherwise.
473 	\return The newly created FD or an error code, if something went wrong.
474 */
475 int
476 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
477 {
478 	// get the I/O context for the team in question
479 	Team* team = Team::Get(fromTeam);
480 	if (team == NULL)
481 		return B_BAD_TEAM_ID;
482 	BReference<Team> teamReference(team, true);
483 
484 	io_context* fromContext = team->io_context;
485 
486 	// get the file descriptor
487 	file_descriptor* descriptor = get_fd(fromContext, fd);
488 	if (descriptor == NULL)
489 		return B_FILE_ERROR;
490 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
491 
492 	// create a new FD in the target I/O context
493 	int result = new_fd(get_current_io_context(kernel), descriptor);
494 	if (result >= 0) {
495 		// the descriptor reference belongs to the slot, now
496 		descriptorPutter.Detach();
497 	}
498 
499 	return result;
500 }
501 
502 
503 static status_t
504 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
505 {
506 	struct file_descriptor* descriptor;
507 	int status;
508 
509 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
510 	if (descriptor == NULL)
511 		return B_FILE_ERROR;
512 
513 	if (descriptor->ops->fd_ioctl)
514 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
515 	else
516 		status = B_DEV_INVALID_IOCTL;
517 
518 	if (status == B_DEV_INVALID_IOCTL)
519 		status = ENOTTY;
520 
521 	put_fd(descriptor);
522 	return status;
523 }
524 
525 
526 static void
527 deselect_select_infos(file_descriptor* descriptor, select_info* infos)
528 {
529 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
530 
531 	select_info* info = infos;
532 	while (info != NULL) {
533 		select_sync* sync = info->sync;
534 
535 		// deselect the selected events
536 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
537 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
538 			for (uint16 event = 1; event < 16; event++) {
539 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
540 					descriptor->ops->fd_deselect(descriptor, event,
541 						(selectsync*)info);
542 				}
543 			}
544 		}
545 
546 		notify_select_events(info, B_EVENT_INVALID);
547 		info = info->next;
548 		put_select_sync(sync);
549 	}
550 }
551 
552 
553 status_t
554 select_fd(int32 fd, struct select_info* info, bool kernel)
555 {
556 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
557 		info->sync, info->selected_events));
558 
559 	FDGetter fdGetter;
560 		// define before the context locker, so it will be destroyed after it
561 
562 	io_context* context = get_current_io_context(kernel);
563 	MutexLocker locker(context->io_mutex);
564 
565 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
566 	if (descriptor == NULL)
567 		return B_FILE_ERROR;
568 
569 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
570 
571 	if (descriptor->ops->fd_select == NULL && eventsToSelect != 0) {
572 		// if the I/O subsystem doesn't support select(), we will
573 		// immediately notify the select call
574 		return notify_select_events(info, eventsToSelect);
575 	}
576 
577 	// We need the FD to stay open while we're doing this, so no select()/
578 	// deselect() will be called on it after it is closed.
579 	atomic_add(&descriptor->open_count, 1);
580 
581 	locker.Unlock();
582 
583 	// select any events asked for
584 	uint32 selectedEvents = 0;
585 
586 	for (uint16 event = 1; event < 16; event++) {
587 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
588 			&& descriptor->ops->fd_select(descriptor, event,
589 				(selectsync*)info) == B_OK) {
590 			selectedEvents |= SELECT_FLAG(event);
591 		}
592 	}
593 	info->selected_events = selectedEvents
594 		| (info->selected_events & B_EVENT_INVALID);
595 
596 	// Add the info to the IO context. Even if nothing has been selected -- we
597 	// always support B_EVENT_INVALID.
598 	locker.Lock();
599 	if (context->fds[fd] != descriptor) {
600 		// Someone close()d the index in the meantime. deselect() all
601 		// events.
602 		info->next = NULL;
603 		deselect_select_infos(descriptor, info);
604 
605 		// Release our open reference of the descriptor.
606 		close_fd(descriptor);
607 		return B_FILE_ERROR;
608 	}
609 
610 	// The FD index hasn't changed, so we add the select info to the table.
611 
612 	info->next = context->select_infos[fd];
613 	context->select_infos[fd] = info;
614 
615 	// As long as the info is in the list, we keep a reference to the sync
616 	// object.
617 	atomic_add(&info->sync->ref_count, 1);
618 
619 	// Finally release our open reference. It is safe just to decrement,
620 	// since as long as the descriptor is associated with the slot,
621 	// someone else still has it open.
622 	atomic_add(&descriptor->open_count, -1);
623 
624 	return B_OK;
625 }
626 
627 
628 status_t
629 deselect_fd(int32 fd, struct select_info* info, bool kernel)
630 {
631 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
632 		info->sync, info->selected_events));
633 
634 	FDGetter fdGetter;
635 		// define before the context locker, so it will be destroyed after it
636 
637 	io_context* context = get_current_io_context(kernel);
638 	MutexLocker locker(context->io_mutex);
639 
640 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
641 	if (descriptor == NULL)
642 		return B_FILE_ERROR;
643 
644 	// remove the info from the IO context
645 
646 	select_info** infoLocation = &context->select_infos[fd];
647 	while (*infoLocation != NULL && *infoLocation != info)
648 		infoLocation = &(*infoLocation)->next;
649 
650 	// If not found, someone else beat us to it.
651 	if (*infoLocation != info)
652 		return B_OK;
653 
654 	*infoLocation = info->next;
655 
656 	locker.Unlock();
657 
658 	// deselect the selected events
659 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
660 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
661 		for (uint16 event = 1; event < 16; event++) {
662 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
663 				descriptor->ops->fd_deselect(descriptor, event,
664 					(selectsync*)info);
665 			}
666 		}
667 	}
668 
669 	put_select_sync(info->sync);
670 
671 	return B_OK;
672 }
673 
674 
675 /*!	This function checks if the specified fd is valid in the current
676 	context. It can be used for a quick check; the fd is not locked
677 	so it could become invalid immediately after this check.
678 */
679 bool
680 fd_is_valid(int fd, bool kernel)
681 {
682 	struct file_descriptor* descriptor
683 		= get_fd(get_current_io_context(kernel), fd);
684 	if (descriptor == NULL)
685 		return false;
686 
687 	put_fd(descriptor);
688 	return true;
689 }
690 
691 
692 struct vnode*
693 fd_vnode(struct file_descriptor* descriptor)
694 {
695 	switch (descriptor->type) {
696 		case FDTYPE_FILE:
697 		case FDTYPE_DIR:
698 		case FDTYPE_ATTR_DIR:
699 		case FDTYPE_ATTR:
700 			return descriptor->u.vnode;
701 	}
702 
703 	return NULL;
704 }
705 
706 
707 static status_t
708 common_close(int fd, bool kernel)
709 {
710 	return close_fd_index(get_current_io_context(kernel), fd);
711 }
712 
713 
714 static ssize_t
715 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
716 {
717 	if (!IS_USER_ADDRESS(buffer))
718 		return B_BAD_ADDRESS;
719 
720 	if (pos < -1)
721 		return B_BAD_VALUE;
722 
723 	FDGetter fdGetter;
724 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
725 	if (!descriptor)
726 		return B_FILE_ERROR;
727 
728 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
729 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
730 		return B_FILE_ERROR;
731 	}
732 
733 	bool movePosition = false;
734 	if (pos == -1) {
735 		pos = descriptor->pos;
736 		movePosition = true;
737 	}
738 
739 	if (write ? descriptor->ops->fd_write == NULL
740 			: descriptor->ops->fd_read == NULL) {
741 		return B_BAD_VALUE;
742 	}
743 
744 	SyscallRestartWrapper<status_t> status;
745 
746 	if (write)
747 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
748 	else
749 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
750 
751 	if (status != B_OK)
752 		return status;
753 
754 	if (movePosition)
755 		descriptor->pos = pos + length;
756 
757 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
758 }
759 
760 
761 static ssize_t
762 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
763 	bool write)
764 {
765 	if (!IS_USER_ADDRESS(userVecs))
766 		return B_BAD_ADDRESS;
767 
768 	if (pos < -1)
769 		return B_BAD_VALUE;
770 
771 	// prevent integer overflow exploit in malloc()
772 	if (count > IOV_MAX)
773 		return B_BAD_VALUE;
774 
775 	FDGetter fdGetter;
776 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
777 	if (!descriptor)
778 		return B_FILE_ERROR;
779 
780 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
781 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
782 		return B_FILE_ERROR;
783 	}
784 
785 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
786 	if (vecs == NULL)
787 		return B_NO_MEMORY;
788 	MemoryDeleter _(vecs);
789 
790 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
791 		return B_BAD_ADDRESS;
792 
793 	bool movePosition = false;
794 	if (pos == -1) {
795 		pos = descriptor->pos;
796 		movePosition = true;
797 	}
798 
799 	if (write ? descriptor->ops->fd_write == NULL
800 			: descriptor->ops->fd_read == NULL) {
801 		return B_BAD_VALUE;
802 	}
803 
804 	SyscallRestartWrapper<status_t> status;
805 
806 	ssize_t bytesTransferred = 0;
807 	for (uint32 i = 0; i < count; i++) {
808 		size_t length = vecs[i].iov_len;
809 		if (write) {
810 			status = descriptor->ops->fd_write(descriptor, pos,
811 				vecs[i].iov_base, &length);
812 		} else {
813 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
814 				&length);
815 		}
816 
817 		if (status != B_OK) {
818 			if (bytesTransferred == 0)
819 				return status;
820 			status = B_OK;
821 			break;
822 		}
823 
824 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
825 			bytesTransferred = SSIZE_MAX;
826 		else
827 			bytesTransferred += (ssize_t)length;
828 
829 		pos += length;
830 
831 		if (length < vecs[i].iov_len)
832 			break;
833 	}
834 
835 	if (movePosition)
836 		descriptor->pos = pos;
837 
838 	return bytesTransferred;
839 }
840 
841 
842 status_t
843 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
844 {
845 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
846 
847 	return fd_ioctl(false, fd, op, buffer, length);
848 }
849 
850 
851 //	#pragma mark - User syscalls
852 
853 
854 ssize_t
855 _user_read(int fd, off_t pos, void* buffer, size_t length)
856 {
857 	return common_user_io(fd, pos, buffer, length, false);
858 }
859 
860 
861 ssize_t
862 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
863 {
864 	return common_user_vector_io(fd, pos, userVecs, count, false);
865 }
866 
867 
868 ssize_t
869 _user_write(int fd, off_t pos, const void* buffer, size_t length)
870 {
871 	return common_user_io(fd, pos, (void*)buffer, length, true);
872 }
873 
874 
875 ssize_t
876 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
877 {
878 	return common_user_vector_io(fd, pos, userVecs, count, true);
879 }
880 
881 
882 off_t
883 _user_seek(int fd, off_t pos, int seekType)
884 {
885 	syscall_64_bit_return_value();
886 
887 	struct file_descriptor* descriptor;
888 
889 	descriptor = get_fd(get_current_io_context(false), fd);
890 	if (!descriptor)
891 		return B_FILE_ERROR;
892 
893 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
894 
895 	if (descriptor->ops->fd_seek)
896 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
897 	else
898 		pos = ESPIPE;
899 
900 	put_fd(descriptor);
901 	return pos;
902 }
903 
904 
905 status_t
906 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
907 {
908 	if (!IS_USER_ADDRESS(buffer))
909 		return B_BAD_ADDRESS;
910 
911 	TRACE(("user_ioctl: fd %d\n", fd));
912 
913 	SyscallRestartWrapper<status_t> status;
914 
915 	return status = fd_ioctl(false, fd, op, buffer, length);
916 }
917 
918 
919 ssize_t
920 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
921 	uint32 maxCount)
922 {
923 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
924 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
925 
926 	if (maxCount == 0)
927 		return 0;
928 
929 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
930 		return B_BAD_ADDRESS;
931 
932 	// get I/O context and FD
933 	io_context* ioContext = get_current_io_context(false);
934 	FDGetter fdGetter;
935 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
936 	if (descriptor == NULL)
937 		return B_FILE_ERROR;
938 
939 	if (descriptor->ops->fd_read_dir == NULL)
940 		return B_UNSUPPORTED;
941 
942 	// restrict buffer size and allocate a heap buffer
943 	if (bufferSize > kMaxReadDirBufferSize)
944 		bufferSize = kMaxReadDirBufferSize;
945 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
946 	if (buffer == NULL)
947 		return B_NO_MEMORY;
948 	MemoryDeleter bufferDeleter(buffer);
949 
950 	// read the directory
951 	uint32 count = maxCount;
952 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
953 		buffer, bufferSize, &count);
954 	if (status != B_OK)
955 		return status;
956 
957 	// copy the buffer back -- determine the total buffer size first
958 	size_t sizeToCopy = 0;
959 	struct dirent* entry = buffer;
960 	for (uint32 i = 0; i < count; i++) {
961 		size_t length = entry->d_reclen;
962 		sizeToCopy += length;
963 		entry = (struct dirent*)((uint8*)entry + length);
964 	}
965 
966 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
967 		return B_BAD_ADDRESS;
968 
969 	return count;
970 }
971 
972 
973 status_t
974 _user_rewind_dir(int fd)
975 {
976 	struct file_descriptor* descriptor;
977 	status_t status;
978 
979 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
980 
981 	descriptor = get_fd(get_current_io_context(false), fd);
982 	if (descriptor == NULL)
983 		return B_FILE_ERROR;
984 
985 	if (descriptor->ops->fd_rewind_dir)
986 		status = descriptor->ops->fd_rewind_dir(descriptor);
987 	else
988 		status = B_UNSUPPORTED;
989 
990 	put_fd(descriptor);
991 	return status;
992 }
993 
994 
995 status_t
996 _user_close(int fd)
997 {
998 	return common_close(fd, false);
999 }
1000 
1001 
1002 int
1003 _user_dup(int fd)
1004 {
1005 	return dup_fd(fd, false);
1006 }
1007 
1008 
1009 int
1010 _user_dup2(int ofd, int nfd)
1011 {
1012 	return dup2_fd(ofd, nfd, false);
1013 }
1014 
1015 
1016 //	#pragma mark - Kernel calls
1017 
1018 
1019 ssize_t
1020 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1021 {
1022 	if (pos < -1)
1023 		return B_BAD_VALUE;
1024 
1025 	FDGetter fdGetter;
1026 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1027 
1028 	if (!descriptor)
1029 		return B_FILE_ERROR;
1030 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1031 		return B_FILE_ERROR;
1032 
1033 	bool movePosition = false;
1034 	if (pos == -1) {
1035 		pos = descriptor->pos;
1036 		movePosition = true;
1037 	}
1038 
1039 	SyscallFlagUnsetter _;
1040 
1041 	if (descriptor->ops->fd_read == NULL)
1042 		return B_BAD_VALUE;
1043 
1044 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1045 		&length);
1046 	if (bytesRead >= B_OK) {
1047 		if (length > SSIZE_MAX)
1048 			bytesRead = SSIZE_MAX;
1049 		else
1050 			bytesRead = (ssize_t)length;
1051 
1052 		if (movePosition)
1053 			descriptor->pos = pos + length;
1054 	}
1055 
1056 	return bytesRead;
1057 }
1058 
1059 
1060 ssize_t
1061 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1062 {
1063 	bool movePosition = false;
1064 	status_t status;
1065 	uint32 i;
1066 
1067 	if (pos < -1)
1068 		return B_BAD_VALUE;
1069 
1070 	FDGetter fdGetter;
1071 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1072 
1073 	if (!descriptor)
1074 		return B_FILE_ERROR;
1075 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1076 		return B_FILE_ERROR;
1077 
1078 	if (pos == -1) {
1079 		pos = descriptor->pos;
1080 		movePosition = true;
1081 	}
1082 
1083 	if (descriptor->ops->fd_read == NULL)
1084 		return B_BAD_VALUE;
1085 
1086 	SyscallFlagUnsetter _;
1087 
1088 	ssize_t bytesRead = 0;
1089 
1090 	for (i = 0; i < count; i++) {
1091 		size_t length = vecs[i].iov_len;
1092 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1093 			&length);
1094 		if (status != B_OK) {
1095 			bytesRead = status;
1096 			break;
1097 		}
1098 
1099 		if ((uint64)bytesRead + length > SSIZE_MAX)
1100 			bytesRead = SSIZE_MAX;
1101 		else
1102 			bytesRead += (ssize_t)length;
1103 
1104 		pos += vecs[i].iov_len;
1105 	}
1106 
1107 	if (movePosition)
1108 		descriptor->pos = pos;
1109 
1110 	return bytesRead;
1111 }
1112 
1113 
1114 ssize_t
1115 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1116 {
1117 	if (pos < -1)
1118 		return B_BAD_VALUE;
1119 
1120 	FDGetter fdGetter;
1121 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1122 
1123 	if (descriptor == NULL)
1124 		return B_FILE_ERROR;
1125 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1126 		return B_FILE_ERROR;
1127 
1128 	bool movePosition = false;
1129 	if (pos == -1) {
1130 		pos = descriptor->pos;
1131 		movePosition = true;
1132 	}
1133 
1134 	if (descriptor->ops->fd_write == NULL)
1135 		return B_BAD_VALUE;
1136 
1137 	SyscallFlagUnsetter _;
1138 
1139 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1140 		&length);
1141 	if (bytesWritten >= B_OK) {
1142 		if (length > SSIZE_MAX)
1143 			bytesWritten = SSIZE_MAX;
1144 		else
1145 			bytesWritten = (ssize_t)length;
1146 
1147 		if (movePosition)
1148 			descriptor->pos = pos + length;
1149 	}
1150 
1151 	return bytesWritten;
1152 }
1153 
1154 
1155 ssize_t
1156 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1157 {
1158 	bool movePosition = false;
1159 	status_t status;
1160 	uint32 i;
1161 
1162 	if (pos < -1)
1163 		return B_BAD_VALUE;
1164 
1165 	FDGetter fdGetter;
1166 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1167 
1168 	if (!descriptor)
1169 		return B_FILE_ERROR;
1170 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1171 		return B_FILE_ERROR;
1172 
1173 	if (pos == -1) {
1174 		pos = descriptor->pos;
1175 		movePosition = true;
1176 	}
1177 
1178 	if (descriptor->ops->fd_write == NULL)
1179 		return B_BAD_VALUE;
1180 
1181 	SyscallFlagUnsetter _;
1182 
1183 	ssize_t bytesWritten = 0;
1184 
1185 	for (i = 0; i < count; i++) {
1186 		size_t length = vecs[i].iov_len;
1187 		status = descriptor->ops->fd_write(descriptor, pos,
1188 			vecs[i].iov_base, &length);
1189 		if (status != B_OK) {
1190 			bytesWritten = status;
1191 			break;
1192 		}
1193 
1194 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1195 			bytesWritten = SSIZE_MAX;
1196 		else
1197 			bytesWritten += (ssize_t)length;
1198 
1199 		pos += vecs[i].iov_len;
1200 	}
1201 
1202 	if (movePosition)
1203 		descriptor->pos = pos;
1204 
1205 	return bytesWritten;
1206 }
1207 
1208 
1209 off_t
1210 _kern_seek(int fd, off_t pos, int seekType)
1211 {
1212 	struct file_descriptor* descriptor;
1213 
1214 	descriptor = get_fd(get_current_io_context(true), fd);
1215 	if (!descriptor)
1216 		return B_FILE_ERROR;
1217 
1218 	if (descriptor->ops->fd_seek)
1219 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1220 	else
1221 		pos = ESPIPE;
1222 
1223 	put_fd(descriptor);
1224 	return pos;
1225 }
1226 
1227 
1228 status_t
1229 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1230 {
1231 	TRACE(("kern_ioctl: fd %d\n", fd));
1232 
1233 	SyscallFlagUnsetter _;
1234 
1235 	return fd_ioctl(true, fd, op, buffer, length);
1236 }
1237 
1238 
1239 ssize_t
1240 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1241 	uint32 maxCount)
1242 {
1243 	struct file_descriptor* descriptor;
1244 	ssize_t retval;
1245 
1246 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1247 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1248 
1249 	struct io_context* ioContext = get_current_io_context(true);
1250 	descriptor = get_fd(ioContext, fd);
1251 	if (descriptor == NULL)
1252 		return B_FILE_ERROR;
1253 
1254 	if (descriptor->ops->fd_read_dir) {
1255 		uint32 count = maxCount;
1256 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1257 			bufferSize, &count);
1258 		if (retval >= 0)
1259 			retval = count;
1260 	} else
1261 		retval = B_UNSUPPORTED;
1262 
1263 	put_fd(descriptor);
1264 	return retval;
1265 }
1266 
1267 
1268 status_t
1269 _kern_rewind_dir(int fd)
1270 {
1271 	struct file_descriptor* descriptor;
1272 	status_t status;
1273 
1274 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1275 
1276 	descriptor = get_fd(get_current_io_context(true), fd);
1277 	if (descriptor == NULL)
1278 		return B_FILE_ERROR;
1279 
1280 	if (descriptor->ops->fd_rewind_dir)
1281 		status = descriptor->ops->fd_rewind_dir(descriptor);
1282 	else
1283 		status = B_UNSUPPORTED;
1284 
1285 	put_fd(descriptor);
1286 	return status;
1287 }
1288 
1289 
1290 status_t
1291 _kern_close(int fd)
1292 {
1293 	return common_close(fd, true);
1294 }
1295 
1296 
1297 int
1298 _kern_dup(int fd)
1299 {
1300 	return dup_fd(fd, true);
1301 }
1302 
1303 
1304 int
1305 _kern_dup2(int ofd, int nfd)
1306 {
1307 	return dup2_fd(ofd, nfd, true);
1308 }
1309 
1310