xref: /haiku/src/system/kernel/fs/fd.cpp (revision a5bf12376daeded4049521eb17a6cc41192250d9)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 
20 #include <syscalls.h>
21 #include <syscall_restart.h>
22 #include <util/AutoLock.h>
23 #include <vfs.h>
24 #include <wait_for_objects.h>
25 
26 #include "vfs_tracing.h"
27 
28 
29 //#define TRACE_FD
30 #ifdef TRACE_FD
31 #	define TRACE(x) dprintf x
32 #else
33 #	define TRACE(x)
34 #endif
35 
36 
37 static const size_t kMaxReadDirBufferSize = 64 * 1024;
38 
39 
40 static struct file_descriptor* get_fd_locked(struct io_context* context,
41 	int fd);
42 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
43 static void deselect_select_infos(file_descriptor* descriptor,
44 	select_info* infos);
45 
46 
47 struct FDGetterLocking {
48 	inline bool Lock(file_descriptor* /*lockable*/)
49 	{
50 		return false;
51 	}
52 
53 	inline void Unlock(file_descriptor* lockable)
54 	{
55 		put_fd(lockable);
56 	}
57 };
58 
59 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
60 public:
61 	inline FDGetter()
62 		: AutoLocker<file_descriptor, FDGetterLocking>()
63 	{
64 	}
65 
66 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
67 		: AutoLocker<file_descriptor, FDGetterLocking>(
68 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
69 	{
70 	}
71 
72 	inline file_descriptor* SetTo(io_context* context, int fd,
73 		bool contextLocked = false)
74 	{
75 		file_descriptor* descriptor
76 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
77 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
78 		return descriptor;
79 	}
80 
81 	inline file_descriptor* SetTo(int fd, bool kernel,
82 		bool contextLocked = false)
83 	{
84 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
85 	}
86 
87 	inline file_descriptor* FD() const
88 	{
89 		return fLockable;
90 	}
91 };
92 
93 
94 //	#pragma mark - General fd routines
95 
96 
97 #ifdef DEBUG
98 void dump_fd(int fd, struct file_descriptor* descriptor);
99 
100 void
101 dump_fd(int fd,struct file_descriptor* descriptor)
102 {
103 	dprintf("fd[%d] = %p: type = %ld, ref_count = %ld, ops = %p, u.vnode = %p, "
104 		"u.mount = %p, cookie = %p, open_mode = %lx, pos = %Ld\n",
105 		fd, descriptor, descriptor->type, descriptor->ref_count,
106 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
107 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
108 }
109 #endif
110 
111 
112 /*! Allocates and initializes a new file_descriptor.
113 */
114 struct file_descriptor*
115 alloc_fd(void)
116 {
117 	file_descriptor* descriptor
118 		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
119 	if (descriptor == NULL)
120 		return NULL;
121 
122 	descriptor->u.vnode = NULL;
123 	descriptor->cookie = NULL;
124 	descriptor->ref_count = 1;
125 	descriptor->open_count = 0;
126 	descriptor->open_mode = 0;
127 	descriptor->pos = 0;
128 
129 	return descriptor;
130 }
131 
132 
133 bool
134 fd_close_on_exec(struct io_context* context, int fd)
135 {
136 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
137 }
138 
139 
140 void
141 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
142 {
143 	if (closeFD)
144 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
145 	else
146 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
147 }
148 
149 
150 /*!	Searches a free slot in the FD table of the provided I/O context, and
151 	inserts the specified descriptor into it.
152 */
153 int
154 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
155 	int firstIndex)
156 {
157 	int fd = -1;
158 	uint32 i;
159 
160 	mutex_lock(&context->io_mutex);
161 
162 	for (i = firstIndex; i < context->table_size; i++) {
163 		if (!context->fds[i]) {
164 			fd = i;
165 			break;
166 		}
167 	}
168 	if (fd < 0) {
169 		fd = B_NO_MORE_FDS;
170 		goto err;
171 	}
172 
173 	TFD(NewFD(context, fd, descriptor));
174 
175 	context->fds[fd] = descriptor;
176 	context->num_used_fds++;
177 	atomic_add(&descriptor->open_count, 1);
178 
179 err:
180 	mutex_unlock(&context->io_mutex);
181 
182 	return fd;
183 }
184 
185 
186 int
187 new_fd(struct io_context* context, struct file_descriptor* descriptor)
188 {
189 	return new_fd_etc(context, descriptor, 0);
190 }
191 
192 
193 /*!	Reduces the descriptor's reference counter, and frees all resources
194 	when it's no longer used.
195 */
196 void
197 put_fd(struct file_descriptor* descriptor)
198 {
199 	int32 previous = atomic_add(&descriptor->ref_count, -1);
200 
201 	TFD(PutFD(descriptor));
202 
203 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
204 		descriptor, descriptor->ref_count, descriptor->cookie));
205 
206 	// free the descriptor if we don't need it anymore
207 	if (previous == 1) {
208 		// free the underlying object
209 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
210 			descriptor->ops->fd_free(descriptor);
211 
212 		free(descriptor);
213 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
214 		&& previous - 1 == descriptor->open_count
215 		&& descriptor->ops != NULL) {
216 		// the descriptor has been disconnected - it cannot
217 		// be accessed anymore, let's close it (no one is
218 		// currently accessing this descriptor)
219 
220 		if (descriptor->ops->fd_close)
221 			descriptor->ops->fd_close(descriptor);
222 		if (descriptor->ops->fd_free)
223 			descriptor->ops->fd_free(descriptor);
224 
225 		// prevent this descriptor from being closed/freed again
226 		descriptor->open_count = -1;
227 		descriptor->ref_count = -1;
228 		descriptor->ops = NULL;
229 		descriptor->u.vnode = NULL;
230 
231 		// the file descriptor is kept intact, so that it's not
232 		// reused until someone explicetly closes it
233 	}
234 }
235 
236 
237 /*!	Decrements the open counter of the file descriptor and invokes
238 	its close hook when appropriate.
239 */
240 void
241 close_fd(struct file_descriptor* descriptor)
242 {
243 	if (atomic_add(&descriptor->open_count, -1) == 1) {
244 		vfs_unlock_vnode_if_locked(descriptor);
245 
246 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
247 			descriptor->ops->fd_close(descriptor);
248 	}
249 }
250 
251 
252 status_t
253 close_fd_index(struct io_context* context, int fd)
254 {
255 	struct file_descriptor* descriptor = remove_fd(context, fd);
256 
257 	if (descriptor == NULL)
258 		return B_FILE_ERROR;
259 
260 	close_fd(descriptor);
261 	put_fd(descriptor);
262 		// the reference associated with the slot
263 
264 	return B_OK;
265 }
266 
267 
268 /*!	This descriptor's underlying object will be closed and freed as soon as
269 	possible (in one of the next calls to put_fd() - get_fd() will no longer
270 	succeed on this descriptor).
271 	This is useful if the underlying object is gone, for instance when a
272 	(mounted) volume got removed unexpectedly.
273 */
274 void
275 disconnect_fd(struct file_descriptor* descriptor)
276 {
277 	descriptor->open_mode |= O_DISCONNECTED;
278 }
279 
280 
281 void
282 inc_fd_ref_count(struct file_descriptor* descriptor)
283 {
284 	atomic_add(&descriptor->ref_count, 1);
285 }
286 
287 
288 static struct file_descriptor*
289 get_fd_locked(struct io_context* context, int fd)
290 {
291 	if (fd < 0 || (uint32)fd >= context->table_size)
292 		return NULL;
293 
294 	struct file_descriptor* descriptor = context->fds[fd];
295 
296 	if (descriptor != NULL) {
297 		// Disconnected descriptors cannot be accessed anymore
298 		if (descriptor->open_mode & O_DISCONNECTED)
299 			descriptor = NULL;
300 		else {
301 			TFD(GetFD(context, fd, descriptor));
302 			inc_fd_ref_count(descriptor);
303 		}
304 	}
305 
306 	return descriptor;
307 }
308 
309 
310 struct file_descriptor*
311 get_fd(struct io_context* context, int fd)
312 {
313 	MutexLocker _(context->io_mutex);
314 
315 	return get_fd_locked(context, fd);
316 }
317 
318 
319 struct file_descriptor*
320 get_open_fd(struct io_context* context, int fd)
321 {
322 	MutexLocker _(context->io_mutex);
323 
324 	file_descriptor* descriptor = get_fd_locked(context, fd);
325 	if (descriptor == NULL)
326 		return NULL;
327 
328 	atomic_add(&descriptor->open_count, 1);
329 
330 	return descriptor;
331 }
332 
333 
334 /*!	Removes the file descriptor from the specified slot.
335 */
336 static struct file_descriptor*
337 remove_fd(struct io_context* context, int fd)
338 {
339 	struct file_descriptor* descriptor = NULL;
340 
341 	if (fd < 0)
342 		return NULL;
343 
344 	mutex_lock(&context->io_mutex);
345 
346 	if ((uint32)fd < context->table_size)
347 		descriptor = context->fds[fd];
348 
349 	select_info* selectInfos = NULL;
350 	bool disconnected = false;
351 
352 	if (descriptor != NULL)	{
353 		// fd is valid
354 		TFD(RemoveFD(context, fd, descriptor));
355 
356 		context->fds[fd] = NULL;
357 		fd_set_close_on_exec(context, fd, false);
358 		context->num_used_fds--;
359 
360 		selectInfos = context->select_infos[fd];
361 		context->select_infos[fd] = NULL;
362 
363 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
364 	}
365 
366 	mutex_unlock(&context->io_mutex);
367 
368 	if (selectInfos != NULL)
369 		deselect_select_infos(descriptor, selectInfos);
370 
371 	return disconnected ? NULL : descriptor;
372 }
373 
374 
375 static int
376 dup_fd(int fd, bool kernel)
377 {
378 	struct io_context* context = get_current_io_context(kernel);
379 	struct file_descriptor* descriptor;
380 	int status;
381 
382 	TRACE(("dup_fd: fd = %d\n", fd));
383 
384 	// Try to get the fd structure
385 	descriptor = get_fd(context, fd);
386 	if (descriptor == NULL)
387 		return B_FILE_ERROR;
388 
389 	// now put the fd in place
390 	status = new_fd(context, descriptor);
391 	if (status < 0)
392 		put_fd(descriptor);
393 	else {
394 		mutex_lock(&context->io_mutex);
395 		fd_set_close_on_exec(context, status, false);
396 		mutex_unlock(&context->io_mutex);
397 	}
398 
399 	return status;
400 }
401 
402 
403 /*!	POSIX says this should be the same as:
404 		close(newfd);
405 		fcntl(oldfd, F_DUPFD, newfd);
406 
407 	We do dup2() directly to be thread-safe.
408 */
409 static int
410 dup2_fd(int oldfd, int newfd, bool kernel)
411 {
412 	struct file_descriptor* evicted = NULL;
413 	struct io_context* context;
414 
415 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
416 
417 	// quick check
418 	if (oldfd < 0 || newfd < 0)
419 		return B_FILE_ERROR;
420 
421 	// Get current I/O context and lock it
422 	context = get_current_io_context(kernel);
423 	mutex_lock(&context->io_mutex);
424 
425 	// Check if the fds are valid (mutex must be locked because
426 	// the table size could be changed)
427 	if ((uint32)oldfd >= context->table_size
428 		|| (uint32)newfd >= context->table_size
429 		|| context->fds[oldfd] == NULL) {
430 		mutex_unlock(&context->io_mutex);
431 		return B_FILE_ERROR;
432 	}
433 
434 	// Check for identity, note that it cannot be made above
435 	// because we always want to return an error on invalid
436 	// handles
437 	select_info* selectInfos = NULL;
438 	if (oldfd != newfd) {
439 		// Now do the work
440 		TFD(Dup2FD(context, oldfd, newfd));
441 
442 		evicted = context->fds[newfd];
443 		selectInfos = context->select_infos[newfd];
444 		context->select_infos[newfd] = NULL;
445 		atomic_add(&context->fds[oldfd]->ref_count, 1);
446 		atomic_add(&context->fds[oldfd]->open_count, 1);
447 		context->fds[newfd] = context->fds[oldfd];
448 
449 		if (evicted == NULL)
450 			context->num_used_fds++;
451 	}
452 
453 	fd_set_close_on_exec(context, newfd, false);
454 
455 	mutex_unlock(&context->io_mutex);
456 
457 	// Say bye bye to the evicted fd
458 	if (evicted) {
459 		deselect_select_infos(evicted, selectInfos);
460 		close_fd(evicted);
461 		put_fd(evicted);
462 	}
463 
464 	return newfd;
465 }
466 
467 
468 /*!	Duplicates an FD from another team to this/the kernel team.
469 	\param fromTeam The team which owns the FD.
470 	\param fd The FD to duplicate.
471 	\param kernel If \c true, the new FD will be created in the kernel team,
472 			the current userland team otherwise.
473 	\return The newly created FD or an error code, if something went wrong.
474 */
475 int
476 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
477 {
478 	// get the I/O context for the team in question
479 	InterruptsSpinLocker teamsLocker(gTeamSpinlock);
480 	struct team* team = team_get_team_struct_locked(fromTeam);
481 	if (team == NULL)
482 		return B_BAD_TEAM_ID;
483 
484 	io_context* fromContext = team->io_context;
485 	vfs_get_io_context(fromContext);
486 
487 	teamsLocker.Unlock();
488 
489 	CObjectDeleter<io_context> _(fromContext, vfs_put_io_context);
490 
491 	// get the file descriptor
492 	file_descriptor* descriptor = get_fd(fromContext, fd);
493 	if (descriptor == NULL)
494 		return B_FILE_ERROR;
495 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
496 
497 	// create a new FD in the target I/O context
498 	int result = new_fd(get_current_io_context(kernel), descriptor);
499 	if (result >= 0) {
500 		// the descriptor reference belongs to the slot, now
501 		descriptorPutter.Detach();
502 	}
503 
504 	return result;
505 }
506 
507 
508 static status_t
509 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
510 {
511 	struct file_descriptor* descriptor;
512 	int status;
513 
514 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
515 	if (descriptor == NULL)
516 		return B_FILE_ERROR;
517 
518 	if (descriptor->ops->fd_ioctl)
519 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
520 	else
521 		status = EOPNOTSUPP;
522 
523 	put_fd(descriptor);
524 	return status;
525 }
526 
527 
528 static void
529 deselect_select_infos(file_descriptor* descriptor, select_info* infos)
530 {
531 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
532 
533 	select_info* info = infos;
534 	while (info != NULL) {
535 		select_sync* sync = info->sync;
536 
537 		// deselect the selected events
538 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
539 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
540 			for (uint16 event = 1; event < 16; event++) {
541 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
542 					descriptor->ops->fd_deselect(descriptor, event,
543 						(selectsync*)info);
544 				}
545 			}
546 		}
547 
548 		notify_select_events(info, B_EVENT_INVALID);
549 		info = info->next;
550 		put_select_sync(sync);
551 	}
552 }
553 
554 
555 status_t
556 select_fd(int32 fd, struct select_info* info, bool kernel)
557 {
558 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
559 		info->sync, info->selected_events));
560 
561 	FDGetter fdGetter;
562 		// define before the context locker, so it will be destroyed after it
563 
564 	io_context* context = get_current_io_context(kernel);
565 	MutexLocker locker(context->io_mutex);
566 
567 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
568 	if (descriptor == NULL)
569 		return B_FILE_ERROR;
570 
571 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
572 
573 	if (descriptor->ops->fd_select == NULL && eventsToSelect != 0) {
574 		// if the I/O subsystem doesn't support select(), we will
575 		// immediately notify the select call
576 		return notify_select_events(info, eventsToSelect);
577 	}
578 
579 	// We need the FD to stay open while we're doing this, so no select()/
580 	// deselect() will be called on it after it is closed.
581 	atomic_add(&descriptor->open_count, 1);
582 
583 	locker.Unlock();
584 
585 	// select any events asked for
586 	uint32 selectedEvents = 0;
587 
588 	for (uint16 event = 1; event < 16; event++) {
589 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
590 			&& descriptor->ops->fd_select(descriptor, event,
591 				(selectsync*)info) == B_OK) {
592 			selectedEvents |= SELECT_FLAG(event);
593 		}
594 	}
595 	info->selected_events = selectedEvents
596 		| (info->selected_events & B_EVENT_INVALID);
597 
598 	// Add the info to the IO context. Even if nothing has been selected -- we
599 	// always support B_EVENT_INVALID.
600 	locker.Lock();
601 	if (context->fds[fd] != descriptor) {
602 		// Someone close()d the index in the meantime. deselect() all
603 		// events.
604 		info->next = NULL;
605 		deselect_select_infos(descriptor, info);
606 
607 		// Release our open reference of the descriptor.
608 		close_fd(descriptor);
609 		return B_FILE_ERROR;
610 	}
611 
612 	// The FD index hasn't changed, so we add the select info to the table.
613 
614 	info->next = context->select_infos[fd];
615 	context->select_infos[fd] = info;
616 
617 	// As long as the info is in the list, we keep a reference to the sync
618 	// object.
619 	atomic_add(&info->sync->ref_count, 1);
620 
621 	// Finally release our open reference. It is safe just to decrement,
622 	// since as long as the descriptor is associated with the slot,
623 	// someone else still has it open.
624 	atomic_add(&descriptor->open_count, -1);
625 
626 	return B_OK;
627 }
628 
629 
630 status_t
631 deselect_fd(int32 fd, struct select_info* info, bool kernel)
632 {
633 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
634 		info->sync, info->selected_events));
635 
636 	FDGetter fdGetter;
637 		// define before the context locker, so it will be destroyed after it
638 
639 	io_context* context = get_current_io_context(kernel);
640 	MutexLocker locker(context->io_mutex);
641 
642 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
643 	if (descriptor == NULL)
644 		return B_FILE_ERROR;
645 
646 	// remove the info from the IO context
647 
648 	select_info** infoLocation = &context->select_infos[fd];
649 	while (*infoLocation != NULL && *infoLocation != info)
650 		infoLocation = &(*infoLocation)->next;
651 
652 	// If not found, someone else beat us to it.
653 	if (*infoLocation != info)
654 		return B_OK;
655 
656 	*infoLocation = info->next;
657 
658 	locker.Unlock();
659 
660 	// deselect the selected events
661 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
662 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
663 		for (uint16 event = 1; event < 16; event++) {
664 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
665 				descriptor->ops->fd_deselect(descriptor, event,
666 					(selectsync*)info);
667 			}
668 		}
669 	}
670 
671 	put_select_sync(info->sync);
672 
673 	return B_OK;
674 }
675 
676 
677 /*!	This function checks if the specified fd is valid in the current
678 	context. It can be used for a quick check; the fd is not locked
679 	so it could become invalid immediately after this check.
680 */
681 bool
682 fd_is_valid(int fd, bool kernel)
683 {
684 	struct file_descriptor* descriptor
685 		= get_fd(get_current_io_context(kernel), fd);
686 	if (descriptor == NULL)
687 		return false;
688 
689 	put_fd(descriptor);
690 	return true;
691 }
692 
693 
694 struct vnode*
695 fd_vnode(struct file_descriptor* descriptor)
696 {
697 	switch (descriptor->type) {
698 		case FDTYPE_FILE:
699 		case FDTYPE_DIR:
700 		case FDTYPE_ATTR_DIR:
701 		case FDTYPE_ATTR:
702 			return descriptor->u.vnode;
703 	}
704 
705 	return NULL;
706 }
707 
708 
709 static status_t
710 common_close(int fd, bool kernel)
711 {
712 	return close_fd_index(get_current_io_context(kernel), fd);
713 }
714 
715 
716 static ssize_t
717 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
718 {
719 	if (!IS_USER_ADDRESS(buffer))
720 		return B_BAD_ADDRESS;
721 
722 	if (pos < -1)
723 		return B_BAD_VALUE;
724 
725 	FDGetter fdGetter;
726 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
727 	if (!descriptor)
728 		return B_FILE_ERROR;
729 
730 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
731 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
732 		return B_FILE_ERROR;
733 	}
734 
735 	bool movePosition = false;
736 	if (pos == -1) {
737 		pos = descriptor->pos;
738 		movePosition = true;
739 	}
740 
741 	if (write ? descriptor->ops->fd_write == NULL
742 			: descriptor->ops->fd_read == NULL) {
743 		return B_BAD_VALUE;
744 	}
745 
746 	SyscallRestartWrapper<status_t> status;
747 
748 	if (write)
749 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
750 	else
751 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
752 
753 	if (status != B_OK)
754 		return status;
755 
756 	if (movePosition)
757 		descriptor->pos = pos + length;
758 
759 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
760 }
761 
762 
763 static ssize_t
764 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
765 	bool write)
766 {
767 	if (!IS_USER_ADDRESS(userVecs))
768 		return B_BAD_ADDRESS;
769 
770 	if (pos < -1)
771 		return B_BAD_VALUE;
772 
773 	// prevent integer overflow exploit in malloc()
774 	if (count > IOV_MAX)
775 		return B_BAD_VALUE;
776 
777 	FDGetter fdGetter;
778 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
779 	if (!descriptor)
780 		return B_FILE_ERROR;
781 
782 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
783 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
784 		return B_FILE_ERROR;
785 	}
786 
787 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
788 	if (vecs == NULL)
789 		return B_NO_MEMORY;
790 	MemoryDeleter _(vecs);
791 
792 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
793 		return B_BAD_ADDRESS;
794 
795 	bool movePosition = false;
796 	if (pos == -1) {
797 		pos = descriptor->pos;
798 		movePosition = true;
799 	}
800 
801 	if (write ? descriptor->ops->fd_write == NULL
802 			: descriptor->ops->fd_read == NULL) {
803 		return B_BAD_VALUE;
804 	}
805 
806 	SyscallRestartWrapper<status_t> status;
807 
808 	ssize_t bytesTransferred = 0;
809 	for (uint32 i = 0; i < count; i++) {
810 		size_t length = vecs[i].iov_len;
811 		if (write) {
812 			status = descriptor->ops->fd_write(descriptor, pos,
813 				vecs[i].iov_base, &length);
814 		} else {
815 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
816 				&length);
817 		}
818 
819 		if (status != B_OK) {
820 			if (bytesTransferred == 0)
821 				return status;
822 			status = B_OK;
823 			break;
824 		}
825 
826 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
827 			bytesTransferred = SSIZE_MAX;
828 		else
829 			bytesTransferred += (ssize_t)length;
830 
831 		pos += length;
832 
833 		if (length < vecs[i].iov_len)
834 			break;
835 	}
836 
837 	if (movePosition)
838 		descriptor->pos = pos;
839 
840 	return bytesTransferred;
841 }
842 
843 
844 status_t
845 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
846 {
847 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
848 
849 	return fd_ioctl(false, fd, op, buffer, length);
850 }
851 
852 
853 //	#pragma mark - User syscalls
854 
855 
856 ssize_t
857 _user_read(int fd, off_t pos, void* buffer, size_t length)
858 {
859 	return common_user_io(fd, pos, buffer, length, false);
860 }
861 
862 
863 ssize_t
864 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
865 {
866 	return common_user_vector_io(fd, pos, userVecs, count, false);
867 }
868 
869 
870 ssize_t
871 _user_write(int fd, off_t pos, const void* buffer, size_t length)
872 {
873 	return common_user_io(fd, pos, (void*)buffer, length, true);
874 }
875 
876 
877 ssize_t
878 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
879 {
880 	return common_user_vector_io(fd, pos, userVecs, count, true);
881 }
882 
883 
884 off_t
885 _user_seek(int fd, off_t pos, int seekType)
886 {
887 	syscall_64_bit_return_value();
888 
889 	struct file_descriptor* descriptor;
890 
891 	descriptor = get_fd(get_current_io_context(false), fd);
892 	if (!descriptor)
893 		return B_FILE_ERROR;
894 
895 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
896 
897 	if (descriptor->ops->fd_seek)
898 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
899 	else
900 		pos = ESPIPE;
901 
902 	put_fd(descriptor);
903 	return pos;
904 }
905 
906 
907 status_t
908 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
909 {
910 	if (!IS_USER_ADDRESS(buffer))
911 		return B_BAD_ADDRESS;
912 
913 	TRACE(("user_ioctl: fd %d\n", fd));
914 
915 	SyscallRestartWrapper<status_t> status;
916 
917 	return status = fd_ioctl(false, fd, op, buffer, length);
918 }
919 
920 
921 ssize_t
922 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
923 	uint32 maxCount)
924 {
925 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
926 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
927 
928 	if (maxCount == 0)
929 		return 0;
930 
931 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
932 		return B_BAD_ADDRESS;
933 
934 	// get I/O context and FD
935 	io_context* ioContext = get_current_io_context(false);
936 	FDGetter fdGetter;
937 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
938 	if (descriptor == NULL)
939 		return B_FILE_ERROR;
940 
941 	if (descriptor->ops->fd_read_dir == NULL)
942 		return B_UNSUPPORTED;
943 
944 	// restrict buffer size and allocate a heap buffer
945 	if (bufferSize > kMaxReadDirBufferSize)
946 		bufferSize = kMaxReadDirBufferSize;
947 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
948 	if (buffer == NULL)
949 		return B_NO_MEMORY;
950 	MemoryDeleter bufferDeleter(buffer);
951 
952 	// read the directory
953 	uint32 count = maxCount;
954 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
955 		buffer, bufferSize, &count);
956 	if (status != B_OK)
957 		return status;
958 
959 	// copy the buffer back -- determine the total buffer size first
960 	size_t sizeToCopy = 0;
961 	struct dirent* entry = buffer;
962 	for (uint32 i = 0; i < count; i++) {
963 		size_t length = entry->d_reclen;
964 		sizeToCopy += length;
965 		entry = (struct dirent*)((uint8*)entry + length);
966 	}
967 
968 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
969 		return B_BAD_ADDRESS;
970 
971 	return count;
972 }
973 
974 
975 status_t
976 _user_rewind_dir(int fd)
977 {
978 	struct file_descriptor* descriptor;
979 	status_t status;
980 
981 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
982 
983 	descriptor = get_fd(get_current_io_context(false), fd);
984 	if (descriptor == NULL)
985 		return B_FILE_ERROR;
986 
987 	if (descriptor->ops->fd_rewind_dir)
988 		status = descriptor->ops->fd_rewind_dir(descriptor);
989 	else
990 		status = EOPNOTSUPP;
991 
992 	put_fd(descriptor);
993 	return status;
994 }
995 
996 
997 status_t
998 _user_close(int fd)
999 {
1000 	return common_close(fd, false);
1001 }
1002 
1003 
1004 int
1005 _user_dup(int fd)
1006 {
1007 	return dup_fd(fd, false);
1008 }
1009 
1010 
1011 int
1012 _user_dup2(int ofd, int nfd)
1013 {
1014 	return dup2_fd(ofd, nfd, false);
1015 }
1016 
1017 
1018 //	#pragma mark - Kernel calls
1019 
1020 
1021 ssize_t
1022 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1023 {
1024 	if (pos < -1)
1025 		return B_BAD_VALUE;
1026 
1027 	FDGetter fdGetter;
1028 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1029 
1030 	if (!descriptor)
1031 		return B_FILE_ERROR;
1032 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1033 		return B_FILE_ERROR;
1034 
1035 	bool movePosition = false;
1036 	if (pos == -1) {
1037 		pos = descriptor->pos;
1038 		movePosition = true;
1039 	}
1040 
1041 	SyscallFlagUnsetter _;
1042 
1043 	if (descriptor->ops->fd_read == NULL)
1044 		return B_BAD_VALUE;
1045 
1046 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1047 		&length);
1048 	if (bytesRead >= B_OK) {
1049 		if (length > SSIZE_MAX)
1050 			bytesRead = SSIZE_MAX;
1051 		else
1052 			bytesRead = (ssize_t)length;
1053 
1054 		if (movePosition)
1055 			descriptor->pos = pos + length;
1056 	}
1057 
1058 	return bytesRead;
1059 }
1060 
1061 
1062 ssize_t
1063 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1064 {
1065 	bool movePosition = false;
1066 	status_t status;
1067 	uint32 i;
1068 
1069 	if (pos < -1)
1070 		return B_BAD_VALUE;
1071 
1072 	FDGetter fdGetter;
1073 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1074 
1075 	if (!descriptor)
1076 		return B_FILE_ERROR;
1077 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1078 		return B_FILE_ERROR;
1079 
1080 	if (pos == -1) {
1081 		pos = descriptor->pos;
1082 		movePosition = true;
1083 	}
1084 
1085 	if (descriptor->ops->fd_read == NULL)
1086 		return B_BAD_VALUE;
1087 
1088 	SyscallFlagUnsetter _;
1089 
1090 	ssize_t bytesRead = 0;
1091 
1092 	for (i = 0; i < count; i++) {
1093 		size_t length = vecs[i].iov_len;
1094 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1095 			&length);
1096 		if (status != B_OK) {
1097 			bytesRead = status;
1098 			break;
1099 		}
1100 
1101 		if ((uint64)bytesRead + length > SSIZE_MAX)
1102 			bytesRead = SSIZE_MAX;
1103 		else
1104 			bytesRead += (ssize_t)length;
1105 
1106 		pos += vecs[i].iov_len;
1107 	}
1108 
1109 	if (movePosition)
1110 		descriptor->pos = pos;
1111 
1112 	return bytesRead;
1113 }
1114 
1115 
1116 ssize_t
1117 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1118 {
1119 	if (pos < -1)
1120 		return B_BAD_VALUE;
1121 
1122 	FDGetter fdGetter;
1123 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1124 
1125 	if (descriptor == NULL)
1126 		return B_FILE_ERROR;
1127 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1128 		return B_FILE_ERROR;
1129 
1130 	bool movePosition = false;
1131 	if (pos == -1) {
1132 		pos = descriptor->pos;
1133 		movePosition = true;
1134 	}
1135 
1136 	if (descriptor->ops->fd_write == NULL)
1137 		return B_BAD_VALUE;
1138 
1139 	SyscallFlagUnsetter _;
1140 
1141 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1142 		&length);
1143 	if (bytesWritten >= B_OK) {
1144 		if (length > SSIZE_MAX)
1145 			bytesWritten = SSIZE_MAX;
1146 		else
1147 			bytesWritten = (ssize_t)length;
1148 
1149 		if (movePosition)
1150 			descriptor->pos = pos + length;
1151 	}
1152 
1153 	return bytesWritten;
1154 }
1155 
1156 
1157 ssize_t
1158 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1159 {
1160 	bool movePosition = false;
1161 	status_t status;
1162 	uint32 i;
1163 
1164 	if (pos < -1)
1165 		return B_BAD_VALUE;
1166 
1167 	FDGetter fdGetter;
1168 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1169 
1170 	if (!descriptor)
1171 		return B_FILE_ERROR;
1172 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1173 		return B_FILE_ERROR;
1174 
1175 	if (pos == -1) {
1176 		pos = descriptor->pos;
1177 		movePosition = true;
1178 	}
1179 
1180 	if (descriptor->ops->fd_write == NULL)
1181 		return B_BAD_VALUE;
1182 
1183 	SyscallFlagUnsetter _;
1184 
1185 	ssize_t bytesWritten = 0;
1186 
1187 	for (i = 0; i < count; i++) {
1188 		size_t length = vecs[i].iov_len;
1189 		status = descriptor->ops->fd_write(descriptor, pos,
1190 			vecs[i].iov_base, &length);
1191 		if (status != B_OK) {
1192 			bytesWritten = status;
1193 			break;
1194 		}
1195 
1196 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1197 			bytesWritten = SSIZE_MAX;
1198 		else
1199 			bytesWritten += (ssize_t)length;
1200 
1201 		pos += vecs[i].iov_len;
1202 	}
1203 
1204 	if (movePosition)
1205 		descriptor->pos = pos;
1206 
1207 	return bytesWritten;
1208 }
1209 
1210 
1211 off_t
1212 _kern_seek(int fd, off_t pos, int seekType)
1213 {
1214 	struct file_descriptor* descriptor;
1215 
1216 	descriptor = get_fd(get_current_io_context(true), fd);
1217 	if (!descriptor)
1218 		return B_FILE_ERROR;
1219 
1220 	if (descriptor->ops->fd_seek)
1221 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1222 	else
1223 		pos = ESPIPE;
1224 
1225 	put_fd(descriptor);
1226 	return pos;
1227 }
1228 
1229 
1230 status_t
1231 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1232 {
1233 	TRACE(("kern_ioctl: fd %d\n", fd));
1234 
1235 	SyscallFlagUnsetter _;
1236 
1237 	return fd_ioctl(true, fd, op, buffer, length);
1238 }
1239 
1240 
1241 ssize_t
1242 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1243 	uint32 maxCount)
1244 {
1245 	struct file_descriptor* descriptor;
1246 	ssize_t retval;
1247 
1248 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1249 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1250 
1251 	struct io_context* ioContext = get_current_io_context(true);
1252 	descriptor = get_fd(ioContext, fd);
1253 	if (descriptor == NULL)
1254 		return B_FILE_ERROR;
1255 
1256 	if (descriptor->ops->fd_read_dir) {
1257 		uint32 count = maxCount;
1258 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1259 			bufferSize, &count);
1260 		if (retval >= 0)
1261 			retval = count;
1262 	} else
1263 		retval = EOPNOTSUPP;
1264 
1265 	put_fd(descriptor);
1266 	return retval;
1267 }
1268 
1269 
1270 status_t
1271 _kern_rewind_dir(int fd)
1272 {
1273 	struct file_descriptor* descriptor;
1274 	status_t status;
1275 
1276 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1277 
1278 	descriptor = get_fd(get_current_io_context(true), fd);
1279 	if (descriptor == NULL)
1280 		return B_FILE_ERROR;
1281 
1282 	if (descriptor->ops->fd_rewind_dir)
1283 		status = descriptor->ops->fd_rewind_dir(descriptor);
1284 	else
1285 		status = EOPNOTSUPP;
1286 
1287 	put_fd(descriptor);
1288 	return status;
1289 }
1290 
1291 
1292 status_t
1293 _kern_close(int fd)
1294 {
1295 	return common_close(fd, true);
1296 }
1297 
1298 
1299 int
1300 _kern_dup(int fd)
1301 {
1302 	return dup_fd(fd, true);
1303 }
1304 
1305 
1306 int
1307 _kern_dup2(int ofd, int nfd)
1308 {
1309 	return dup2_fd(ofd, nfd, true);
1310 }
1311 
1312