xref: /haiku/src/system/kernel/fs/fd.cpp (revision 90c3b9bf9fe633e4c6a59d42c11c3f523183c553)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 
20 #include <syscalls.h>
21 #include <syscall_restart.h>
22 #include <util/AutoLock.h>
23 #include <vfs.h>
24 #include <wait_for_objects.h>
25 
26 #include "vfs_tracing.h"
27 
28 
29 //#define TRACE_FD
30 #ifdef TRACE_FD
31 #	define TRACE(x) dprintf x
32 #else
33 #	define TRACE(x)
34 #endif
35 
36 
37 static const size_t kMaxReadDirBufferSize = 64 * 1024;
38 
39 
40 static struct file_descriptor* get_fd_locked(struct io_context* context,
41 	int fd);
42 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
43 static void deselect_select_infos(file_descriptor* descriptor,
44 	select_info* infos, bool putSyncObjects);
45 
46 
47 struct FDGetterLocking {
48 	inline bool Lock(file_descriptor* /*lockable*/)
49 	{
50 		return false;
51 	}
52 
53 	inline void Unlock(file_descriptor* lockable)
54 	{
55 		put_fd(lockable);
56 	}
57 };
58 
59 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
60 public:
61 	inline FDGetter()
62 		: AutoLocker<file_descriptor, FDGetterLocking>()
63 	{
64 	}
65 
66 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
67 		: AutoLocker<file_descriptor, FDGetterLocking>(
68 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
69 	{
70 	}
71 
72 	inline file_descriptor* SetTo(io_context* context, int fd,
73 		bool contextLocked = false)
74 	{
75 		file_descriptor* descriptor
76 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
77 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
78 		return descriptor;
79 	}
80 
81 	inline file_descriptor* SetTo(int fd, bool kernel,
82 		bool contextLocked = false)
83 	{
84 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
85 	}
86 
87 	inline file_descriptor* FD() const
88 	{
89 		return fLockable;
90 	}
91 };
92 
93 
94 //	#pragma mark - General fd routines
95 
96 
97 #ifdef DEBUG
98 void dump_fd(int fd, struct file_descriptor* descriptor);
99 
100 void
101 dump_fd(int fd,struct file_descriptor* descriptor)
102 {
103 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
104 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
105 		", pos = %" B_PRId64 "\n",
106 		fd, descriptor, descriptor->type, descriptor->ref_count,
107 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
108 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
109 }
110 #endif
111 
112 
113 /*! Allocates and initializes a new file_descriptor.
114 */
115 struct file_descriptor*
116 alloc_fd(void)
117 {
118 	file_descriptor* descriptor
119 		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
120 	if (descriptor == NULL)
121 		return NULL;
122 
123 	descriptor->u.vnode = NULL;
124 	descriptor->cookie = NULL;
125 	descriptor->ref_count = 1;
126 	descriptor->open_count = 0;
127 	descriptor->open_mode = 0;
128 	descriptor->pos = 0;
129 
130 	return descriptor;
131 }
132 
133 
134 bool
135 fd_close_on_exec(struct io_context* context, int fd)
136 {
137 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
138 }
139 
140 
141 void
142 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
143 {
144 	if (closeFD)
145 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
146 	else
147 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
148 }
149 
150 
151 /*!	Searches a free slot in the FD table of the provided I/O context, and
152 	inserts the specified descriptor into it.
153 */
154 int
155 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
156 	int firstIndex)
157 {
158 	int fd = -1;
159 	uint32 i;
160 
161 	mutex_lock(&context->io_mutex);
162 
163 	for (i = firstIndex; i < context->table_size; i++) {
164 		if (!context->fds[i]) {
165 			fd = i;
166 			break;
167 		}
168 	}
169 	if (fd < 0) {
170 		fd = B_NO_MORE_FDS;
171 		goto err;
172 	}
173 
174 	TFD(NewFD(context, fd, descriptor));
175 
176 	context->fds[fd] = descriptor;
177 	context->num_used_fds++;
178 	atomic_add(&descriptor->open_count, 1);
179 
180 err:
181 	mutex_unlock(&context->io_mutex);
182 
183 	return fd;
184 }
185 
186 
187 int
188 new_fd(struct io_context* context, struct file_descriptor* descriptor)
189 {
190 	return new_fd_etc(context, descriptor, 0);
191 }
192 
193 
194 /*!	Reduces the descriptor's reference counter, and frees all resources
195 	when it's no longer used.
196 */
197 void
198 put_fd(struct file_descriptor* descriptor)
199 {
200 	int32 previous = atomic_add(&descriptor->ref_count, -1);
201 
202 	TFD(PutFD(descriptor));
203 
204 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
205 		descriptor, descriptor->ref_count, descriptor->cookie));
206 
207 	// free the descriptor if we don't need it anymore
208 	if (previous == 1) {
209 		// free the underlying object
210 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
211 			descriptor->ops->fd_free(descriptor);
212 
213 		free(descriptor);
214 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
215 		&& previous - 1 == descriptor->open_count
216 		&& descriptor->ops != NULL) {
217 		// the descriptor has been disconnected - it cannot
218 		// be accessed anymore, let's close it (no one is
219 		// currently accessing this descriptor)
220 
221 		if (descriptor->ops->fd_close)
222 			descriptor->ops->fd_close(descriptor);
223 		if (descriptor->ops->fd_free)
224 			descriptor->ops->fd_free(descriptor);
225 
226 		// prevent this descriptor from being closed/freed again
227 		descriptor->open_count = -1;
228 		descriptor->ref_count = -1;
229 		descriptor->ops = NULL;
230 		descriptor->u.vnode = NULL;
231 
232 		// the file descriptor is kept intact, so that it's not
233 		// reused until someone explicetly closes it
234 	}
235 }
236 
237 
238 /*!	Decrements the open counter of the file descriptor and invokes
239 	its close hook when appropriate.
240 */
241 void
242 close_fd(struct file_descriptor* descriptor)
243 {
244 	if (atomic_add(&descriptor->open_count, -1) == 1) {
245 		vfs_unlock_vnode_if_locked(descriptor);
246 
247 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
248 			descriptor->ops->fd_close(descriptor);
249 	}
250 }
251 
252 
253 status_t
254 close_fd_index(struct io_context* context, int fd)
255 {
256 	struct file_descriptor* descriptor = remove_fd(context, fd);
257 
258 	if (descriptor == NULL)
259 		return B_FILE_ERROR;
260 
261 	close_fd(descriptor);
262 	put_fd(descriptor);
263 		// the reference associated with the slot
264 
265 	return B_OK;
266 }
267 
268 
269 /*!	This descriptor's underlying object will be closed and freed as soon as
270 	possible (in one of the next calls to put_fd() - get_fd() will no longer
271 	succeed on this descriptor).
272 	This is useful if the underlying object is gone, for instance when a
273 	(mounted) volume got removed unexpectedly.
274 */
275 void
276 disconnect_fd(struct file_descriptor* descriptor)
277 {
278 	descriptor->open_mode |= O_DISCONNECTED;
279 }
280 
281 
282 void
283 inc_fd_ref_count(struct file_descriptor* descriptor)
284 {
285 	atomic_add(&descriptor->ref_count, 1);
286 }
287 
288 
289 static struct file_descriptor*
290 get_fd_locked(struct io_context* context, int fd)
291 {
292 	if (fd < 0 || (uint32)fd >= context->table_size)
293 		return NULL;
294 
295 	struct file_descriptor* descriptor = context->fds[fd];
296 
297 	if (descriptor != NULL) {
298 		// Disconnected descriptors cannot be accessed anymore
299 		if (descriptor->open_mode & O_DISCONNECTED)
300 			descriptor = NULL;
301 		else {
302 			TFD(GetFD(context, fd, descriptor));
303 			inc_fd_ref_count(descriptor);
304 		}
305 	}
306 
307 	return descriptor;
308 }
309 
310 
311 struct file_descriptor*
312 get_fd(struct io_context* context, int fd)
313 {
314 	MutexLocker _(context->io_mutex);
315 
316 	return get_fd_locked(context, fd);
317 }
318 
319 
320 struct file_descriptor*
321 get_open_fd(struct io_context* context, int fd)
322 {
323 	MutexLocker _(context->io_mutex);
324 
325 	file_descriptor* descriptor = get_fd_locked(context, fd);
326 	if (descriptor == NULL)
327 		return NULL;
328 
329 	atomic_add(&descriptor->open_count, 1);
330 
331 	return descriptor;
332 }
333 
334 
335 /*!	Removes the file descriptor from the specified slot.
336 */
337 static struct file_descriptor*
338 remove_fd(struct io_context* context, int fd)
339 {
340 	struct file_descriptor* descriptor = NULL;
341 
342 	if (fd < 0)
343 		return NULL;
344 
345 	mutex_lock(&context->io_mutex);
346 
347 	if ((uint32)fd < context->table_size)
348 		descriptor = context->fds[fd];
349 
350 	select_info* selectInfos = NULL;
351 	bool disconnected = false;
352 
353 	if (descriptor != NULL)	{
354 		// fd is valid
355 		TFD(RemoveFD(context, fd, descriptor));
356 
357 		context->fds[fd] = NULL;
358 		fd_set_close_on_exec(context, fd, false);
359 		context->num_used_fds--;
360 
361 		selectInfos = context->select_infos[fd];
362 		context->select_infos[fd] = NULL;
363 
364 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
365 	}
366 
367 	mutex_unlock(&context->io_mutex);
368 
369 	if (selectInfos != NULL)
370 		deselect_select_infos(descriptor, selectInfos, true);
371 
372 	return disconnected ? NULL : descriptor;
373 }
374 
375 
376 static int
377 dup_fd(int fd, bool kernel)
378 {
379 	struct io_context* context = get_current_io_context(kernel);
380 	struct file_descriptor* descriptor;
381 	int status;
382 
383 	TRACE(("dup_fd: fd = %d\n", fd));
384 
385 	// Try to get the fd structure
386 	descriptor = get_fd(context, fd);
387 	if (descriptor == NULL)
388 		return B_FILE_ERROR;
389 
390 	// now put the fd in place
391 	status = new_fd(context, descriptor);
392 	if (status < 0)
393 		put_fd(descriptor);
394 	else {
395 		mutex_lock(&context->io_mutex);
396 		fd_set_close_on_exec(context, status, false);
397 		mutex_unlock(&context->io_mutex);
398 	}
399 
400 	return status;
401 }
402 
403 
404 /*!	POSIX says this should be the same as:
405 		close(newfd);
406 		fcntl(oldfd, F_DUPFD, newfd);
407 
408 	We do dup2() directly to be thread-safe.
409 */
410 static int
411 dup2_fd(int oldfd, int newfd, bool kernel)
412 {
413 	struct file_descriptor* evicted = NULL;
414 	struct io_context* context;
415 
416 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
417 
418 	// quick check
419 	if (oldfd < 0 || newfd < 0)
420 		return B_FILE_ERROR;
421 
422 	// Get current I/O context and lock it
423 	context = get_current_io_context(kernel);
424 	mutex_lock(&context->io_mutex);
425 
426 	// Check if the fds are valid (mutex must be locked because
427 	// the table size could be changed)
428 	if ((uint32)oldfd >= context->table_size
429 		|| (uint32)newfd >= context->table_size
430 		|| context->fds[oldfd] == NULL) {
431 		mutex_unlock(&context->io_mutex);
432 		return B_FILE_ERROR;
433 	}
434 
435 	// Check for identity, note that it cannot be made above
436 	// because we always want to return an error on invalid
437 	// handles
438 	select_info* selectInfos = NULL;
439 	if (oldfd != newfd) {
440 		// Now do the work
441 		TFD(Dup2FD(context, oldfd, newfd));
442 
443 		evicted = context->fds[newfd];
444 		selectInfos = context->select_infos[newfd];
445 		context->select_infos[newfd] = NULL;
446 		atomic_add(&context->fds[oldfd]->ref_count, 1);
447 		atomic_add(&context->fds[oldfd]->open_count, 1);
448 		context->fds[newfd] = context->fds[oldfd];
449 
450 		if (evicted == NULL)
451 			context->num_used_fds++;
452 	}
453 
454 	fd_set_close_on_exec(context, newfd, false);
455 
456 	mutex_unlock(&context->io_mutex);
457 
458 	// Say bye bye to the evicted fd
459 	if (evicted) {
460 		deselect_select_infos(evicted, selectInfos, true);
461 		close_fd(evicted);
462 		put_fd(evicted);
463 	}
464 
465 	return newfd;
466 }
467 
468 
469 /*!	Duplicates an FD from another team to this/the kernel team.
470 	\param fromTeam The team which owns the FD.
471 	\param fd The FD to duplicate.
472 	\param kernel If \c true, the new FD will be created in the kernel team,
473 			the current userland team otherwise.
474 	\return The newly created FD or an error code, if something went wrong.
475 */
476 int
477 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
478 {
479 	// get the I/O context for the team in question
480 	Team* team = Team::Get(fromTeam);
481 	if (team == NULL)
482 		return B_BAD_TEAM_ID;
483 	BReference<Team> teamReference(team, true);
484 
485 	io_context* fromContext = team->io_context;
486 
487 	// get the file descriptor
488 	file_descriptor* descriptor = get_fd(fromContext, fd);
489 	if (descriptor == NULL)
490 		return B_FILE_ERROR;
491 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
492 
493 	// create a new FD in the target I/O context
494 	int result = new_fd(get_current_io_context(kernel), descriptor);
495 	if (result >= 0) {
496 		// the descriptor reference belongs to the slot, now
497 		descriptorPutter.Detach();
498 	}
499 
500 	return result;
501 }
502 
503 
504 static status_t
505 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
506 {
507 	struct file_descriptor* descriptor;
508 	int status;
509 
510 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
511 	if (descriptor == NULL)
512 		return B_FILE_ERROR;
513 
514 	if (descriptor->ops->fd_ioctl)
515 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
516 	else
517 		status = B_DEV_INVALID_IOCTL;
518 
519 	if (status == B_DEV_INVALID_IOCTL)
520 		status = ENOTTY;
521 
522 	put_fd(descriptor);
523 	return status;
524 }
525 
526 
527 static void
528 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
529 	bool putSyncObjects)
530 {
531 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
532 
533 	select_info* info = infos;
534 	while (info != NULL) {
535 		select_sync* sync = info->sync;
536 
537 		// deselect the selected events
538 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
539 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
540 			for (uint16 event = 1; event < 16; event++) {
541 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
542 					descriptor->ops->fd_deselect(descriptor, event,
543 						(selectsync*)info);
544 				}
545 			}
546 		}
547 
548 		notify_select_events(info, B_EVENT_INVALID);
549 		info = info->next;
550 
551 		if (putSyncObjects)
552 			put_select_sync(sync);
553 	}
554 }
555 
556 
557 status_t
558 select_fd(int32 fd, struct select_info* info, bool kernel)
559 {
560 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
561 		info->sync, info->selected_events));
562 
563 	FDGetter fdGetter;
564 		// define before the context locker, so it will be destroyed after it
565 
566 	io_context* context = get_current_io_context(kernel);
567 	MutexLocker locker(context->io_mutex);
568 
569 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
570 	if (descriptor == NULL)
571 		return B_FILE_ERROR;
572 
573 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
574 
575 	if (descriptor->ops->fd_select == NULL && eventsToSelect != 0) {
576 		// if the I/O subsystem doesn't support select(), we will
577 		// immediately notify the select call
578 		return notify_select_events(info, eventsToSelect);
579 	}
580 
581 	// We need the FD to stay open while we're doing this, so no select()/
582 	// deselect() will be called on it after it is closed.
583 	atomic_add(&descriptor->open_count, 1);
584 
585 	locker.Unlock();
586 
587 	// select any events asked for
588 	uint32 selectedEvents = 0;
589 
590 	for (uint16 event = 1; event < 16; event++) {
591 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
592 			&& descriptor->ops->fd_select(descriptor, event,
593 				(selectsync*)info) == B_OK) {
594 			selectedEvents |= SELECT_FLAG(event);
595 		}
596 	}
597 	info->selected_events = selectedEvents
598 		| (info->selected_events & B_EVENT_INVALID);
599 
600 	// Add the info to the IO context. Even if nothing has been selected -- we
601 	// always support B_EVENT_INVALID.
602 	locker.Lock();
603 	if (context->fds[fd] != descriptor) {
604 		// Someone close()d the index in the meantime. deselect() all
605 		// events.
606 		info->next = NULL;
607 		deselect_select_infos(descriptor, info, false);
608 
609 		// Release our open reference of the descriptor.
610 		close_fd(descriptor);
611 		return B_FILE_ERROR;
612 	}
613 
614 	// The FD index hasn't changed, so we add the select info to the table.
615 
616 	info->next = context->select_infos[fd];
617 	context->select_infos[fd] = info;
618 
619 	// As long as the info is in the list, we keep a reference to the sync
620 	// object.
621 	atomic_add(&info->sync->ref_count, 1);
622 
623 	// Finally release our open reference. It is safe just to decrement,
624 	// since as long as the descriptor is associated with the slot,
625 	// someone else still has it open.
626 	atomic_add(&descriptor->open_count, -1);
627 
628 	return B_OK;
629 }
630 
631 
632 status_t
633 deselect_fd(int32 fd, struct select_info* info, bool kernel)
634 {
635 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
636 		info->sync, info->selected_events));
637 
638 	FDGetter fdGetter;
639 		// define before the context locker, so it will be destroyed after it
640 
641 	io_context* context = get_current_io_context(kernel);
642 	MutexLocker locker(context->io_mutex);
643 
644 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
645 	if (descriptor == NULL)
646 		return B_FILE_ERROR;
647 
648 	// remove the info from the IO context
649 
650 	select_info** infoLocation = &context->select_infos[fd];
651 	while (*infoLocation != NULL && *infoLocation != info)
652 		infoLocation = &(*infoLocation)->next;
653 
654 	// If not found, someone else beat us to it.
655 	if (*infoLocation != info)
656 		return B_OK;
657 
658 	*infoLocation = info->next;
659 
660 	locker.Unlock();
661 
662 	// deselect the selected events
663 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
664 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
665 		for (uint16 event = 1; event < 16; event++) {
666 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
667 				descriptor->ops->fd_deselect(descriptor, event,
668 					(selectsync*)info);
669 			}
670 		}
671 	}
672 
673 	put_select_sync(info->sync);
674 
675 	return B_OK;
676 }
677 
678 
679 /*!	This function checks if the specified fd is valid in the current
680 	context. It can be used for a quick check; the fd is not locked
681 	so it could become invalid immediately after this check.
682 */
683 bool
684 fd_is_valid(int fd, bool kernel)
685 {
686 	struct file_descriptor* descriptor
687 		= get_fd(get_current_io_context(kernel), fd);
688 	if (descriptor == NULL)
689 		return false;
690 
691 	put_fd(descriptor);
692 	return true;
693 }
694 
695 
696 struct vnode*
697 fd_vnode(struct file_descriptor* descriptor)
698 {
699 	switch (descriptor->type) {
700 		case FDTYPE_FILE:
701 		case FDTYPE_DIR:
702 		case FDTYPE_ATTR_DIR:
703 		case FDTYPE_ATTR:
704 			return descriptor->u.vnode;
705 	}
706 
707 	return NULL;
708 }
709 
710 
711 static status_t
712 common_close(int fd, bool kernel)
713 {
714 	return close_fd_index(get_current_io_context(kernel), fd);
715 }
716 
717 
718 static ssize_t
719 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
720 {
721 	if (!IS_USER_ADDRESS(buffer))
722 		return B_BAD_ADDRESS;
723 
724 	if (pos < -1)
725 		return B_BAD_VALUE;
726 
727 	FDGetter fdGetter;
728 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
729 	if (!descriptor)
730 		return B_FILE_ERROR;
731 
732 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
733 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
734 		return B_FILE_ERROR;
735 	}
736 
737 	bool movePosition = false;
738 	if (pos == -1) {
739 		pos = descriptor->pos;
740 		movePosition = true;
741 	}
742 
743 	if (write ? descriptor->ops->fd_write == NULL
744 			: descriptor->ops->fd_read == NULL) {
745 		return B_BAD_VALUE;
746 	}
747 
748 	SyscallRestartWrapper<status_t> status;
749 
750 	if (write)
751 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
752 	else
753 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
754 
755 	if (status != B_OK)
756 		return status;
757 
758 	if (movePosition)
759 		descriptor->pos = pos + length;
760 
761 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
762 }
763 
764 
765 static ssize_t
766 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
767 	bool write)
768 {
769 	if (!IS_USER_ADDRESS(userVecs))
770 		return B_BAD_ADDRESS;
771 
772 	if (pos < -1)
773 		return B_BAD_VALUE;
774 
775 	// prevent integer overflow exploit in malloc()
776 	if (count > IOV_MAX)
777 		return B_BAD_VALUE;
778 
779 	FDGetter fdGetter;
780 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
781 	if (!descriptor)
782 		return B_FILE_ERROR;
783 
784 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
785 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
786 		return B_FILE_ERROR;
787 	}
788 
789 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
790 	if (vecs == NULL)
791 		return B_NO_MEMORY;
792 	MemoryDeleter _(vecs);
793 
794 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
795 		return B_BAD_ADDRESS;
796 
797 	bool movePosition = false;
798 	if (pos == -1) {
799 		pos = descriptor->pos;
800 		movePosition = true;
801 	}
802 
803 	if (write ? descriptor->ops->fd_write == NULL
804 			: descriptor->ops->fd_read == NULL) {
805 		return B_BAD_VALUE;
806 	}
807 
808 	SyscallRestartWrapper<status_t> status;
809 
810 	ssize_t bytesTransferred = 0;
811 	for (uint32 i = 0; i < count; i++) {
812 		size_t length = vecs[i].iov_len;
813 		if (write) {
814 			status = descriptor->ops->fd_write(descriptor, pos,
815 				vecs[i].iov_base, &length);
816 		} else {
817 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
818 				&length);
819 		}
820 
821 		if (status != B_OK) {
822 			if (bytesTransferred == 0)
823 				return status;
824 			status = B_OK;
825 			break;
826 		}
827 
828 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
829 			bytesTransferred = SSIZE_MAX;
830 		else
831 			bytesTransferred += (ssize_t)length;
832 
833 		pos += length;
834 
835 		if (length < vecs[i].iov_len)
836 			break;
837 	}
838 
839 	if (movePosition)
840 		descriptor->pos = pos;
841 
842 	return bytesTransferred;
843 }
844 
845 
846 status_t
847 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
848 {
849 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
850 
851 	return fd_ioctl(false, fd, op, buffer, length);
852 }
853 
854 
855 //	#pragma mark - User syscalls
856 
857 
858 ssize_t
859 _user_read(int fd, off_t pos, void* buffer, size_t length)
860 {
861 	return common_user_io(fd, pos, buffer, length, false);
862 }
863 
864 
865 ssize_t
866 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
867 {
868 	return common_user_vector_io(fd, pos, userVecs, count, false);
869 }
870 
871 
872 ssize_t
873 _user_write(int fd, off_t pos, const void* buffer, size_t length)
874 {
875 	return common_user_io(fd, pos, (void*)buffer, length, true);
876 }
877 
878 
879 ssize_t
880 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
881 {
882 	return common_user_vector_io(fd, pos, userVecs, count, true);
883 }
884 
885 
886 off_t
887 _user_seek(int fd, off_t pos, int seekType)
888 {
889 	syscall_64_bit_return_value();
890 
891 	struct file_descriptor* descriptor;
892 
893 	descriptor = get_fd(get_current_io_context(false), fd);
894 	if (!descriptor)
895 		return B_FILE_ERROR;
896 
897 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
898 
899 	if (descriptor->ops->fd_seek)
900 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
901 	else
902 		pos = ESPIPE;
903 
904 	put_fd(descriptor);
905 	return pos;
906 }
907 
908 
909 status_t
910 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
911 {
912 	if (!IS_USER_ADDRESS(buffer))
913 		return B_BAD_ADDRESS;
914 
915 	TRACE(("user_ioctl: fd %d\n", fd));
916 
917 	SyscallRestartWrapper<status_t> status;
918 
919 	return status = fd_ioctl(false, fd, op, buffer, length);
920 }
921 
922 
923 ssize_t
924 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
925 	uint32 maxCount)
926 {
927 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
928 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
929 
930 	if (maxCount == 0)
931 		return 0;
932 
933 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
934 		return B_BAD_ADDRESS;
935 
936 	// get I/O context and FD
937 	io_context* ioContext = get_current_io_context(false);
938 	FDGetter fdGetter;
939 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
940 	if (descriptor == NULL)
941 		return B_FILE_ERROR;
942 
943 	if (descriptor->ops->fd_read_dir == NULL)
944 		return B_UNSUPPORTED;
945 
946 	// restrict buffer size and allocate a heap buffer
947 	if (bufferSize > kMaxReadDirBufferSize)
948 		bufferSize = kMaxReadDirBufferSize;
949 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
950 	if (buffer == NULL)
951 		return B_NO_MEMORY;
952 	MemoryDeleter bufferDeleter(buffer);
953 
954 	// read the directory
955 	uint32 count = maxCount;
956 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
957 		buffer, bufferSize, &count);
958 	if (status != B_OK)
959 		return status;
960 
961 	// copy the buffer back -- determine the total buffer size first
962 	size_t sizeToCopy = 0;
963 	struct dirent* entry = buffer;
964 	for (uint32 i = 0; i < count; i++) {
965 		size_t length = entry->d_reclen;
966 		sizeToCopy += length;
967 		entry = (struct dirent*)((uint8*)entry + length);
968 	}
969 
970 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
971 		return B_BAD_ADDRESS;
972 
973 	return count;
974 }
975 
976 
977 status_t
978 _user_rewind_dir(int fd)
979 {
980 	struct file_descriptor* descriptor;
981 	status_t status;
982 
983 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
984 
985 	descriptor = get_fd(get_current_io_context(false), fd);
986 	if (descriptor == NULL)
987 		return B_FILE_ERROR;
988 
989 	if (descriptor->ops->fd_rewind_dir)
990 		status = descriptor->ops->fd_rewind_dir(descriptor);
991 	else
992 		status = B_UNSUPPORTED;
993 
994 	put_fd(descriptor);
995 	return status;
996 }
997 
998 
999 status_t
1000 _user_close(int fd)
1001 {
1002 	return common_close(fd, false);
1003 }
1004 
1005 
1006 int
1007 _user_dup(int fd)
1008 {
1009 	return dup_fd(fd, false);
1010 }
1011 
1012 
1013 int
1014 _user_dup2(int ofd, int nfd)
1015 {
1016 	return dup2_fd(ofd, nfd, false);
1017 }
1018 
1019 
1020 //	#pragma mark - Kernel calls
1021 
1022 
1023 ssize_t
1024 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1025 {
1026 	if (pos < -1)
1027 		return B_BAD_VALUE;
1028 
1029 	FDGetter fdGetter;
1030 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1031 
1032 	if (!descriptor)
1033 		return B_FILE_ERROR;
1034 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1035 		return B_FILE_ERROR;
1036 
1037 	bool movePosition = false;
1038 	if (pos == -1) {
1039 		pos = descriptor->pos;
1040 		movePosition = true;
1041 	}
1042 
1043 	SyscallFlagUnsetter _;
1044 
1045 	if (descriptor->ops->fd_read == NULL)
1046 		return B_BAD_VALUE;
1047 
1048 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1049 		&length);
1050 	if (bytesRead >= B_OK) {
1051 		if (length > SSIZE_MAX)
1052 			bytesRead = SSIZE_MAX;
1053 		else
1054 			bytesRead = (ssize_t)length;
1055 
1056 		if (movePosition)
1057 			descriptor->pos = pos + length;
1058 	}
1059 
1060 	return bytesRead;
1061 }
1062 
1063 
1064 ssize_t
1065 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1066 {
1067 	bool movePosition = false;
1068 	status_t status;
1069 	uint32 i;
1070 
1071 	if (pos < -1)
1072 		return B_BAD_VALUE;
1073 
1074 	FDGetter fdGetter;
1075 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1076 
1077 	if (!descriptor)
1078 		return B_FILE_ERROR;
1079 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1080 		return B_FILE_ERROR;
1081 
1082 	if (pos == -1) {
1083 		pos = descriptor->pos;
1084 		movePosition = true;
1085 	}
1086 
1087 	if (descriptor->ops->fd_read == NULL)
1088 		return B_BAD_VALUE;
1089 
1090 	SyscallFlagUnsetter _;
1091 
1092 	ssize_t bytesRead = 0;
1093 
1094 	for (i = 0; i < count; i++) {
1095 		size_t length = vecs[i].iov_len;
1096 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1097 			&length);
1098 		if (status != B_OK) {
1099 			bytesRead = status;
1100 			break;
1101 		}
1102 
1103 		if ((uint64)bytesRead + length > SSIZE_MAX)
1104 			bytesRead = SSIZE_MAX;
1105 		else
1106 			bytesRead += (ssize_t)length;
1107 
1108 		pos += vecs[i].iov_len;
1109 	}
1110 
1111 	if (movePosition)
1112 		descriptor->pos = pos;
1113 
1114 	return bytesRead;
1115 }
1116 
1117 
1118 ssize_t
1119 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1120 {
1121 	if (pos < -1)
1122 		return B_BAD_VALUE;
1123 
1124 	FDGetter fdGetter;
1125 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1126 
1127 	if (descriptor == NULL)
1128 		return B_FILE_ERROR;
1129 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1130 		return B_FILE_ERROR;
1131 
1132 	bool movePosition = false;
1133 	if (pos == -1) {
1134 		pos = descriptor->pos;
1135 		movePosition = true;
1136 	}
1137 
1138 	if (descriptor->ops->fd_write == NULL)
1139 		return B_BAD_VALUE;
1140 
1141 	SyscallFlagUnsetter _;
1142 
1143 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1144 		&length);
1145 	if (bytesWritten >= B_OK) {
1146 		if (length > SSIZE_MAX)
1147 			bytesWritten = SSIZE_MAX;
1148 		else
1149 			bytesWritten = (ssize_t)length;
1150 
1151 		if (movePosition)
1152 			descriptor->pos = pos + length;
1153 	}
1154 
1155 	return bytesWritten;
1156 }
1157 
1158 
1159 ssize_t
1160 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1161 {
1162 	bool movePosition = false;
1163 	status_t status;
1164 	uint32 i;
1165 
1166 	if (pos < -1)
1167 		return B_BAD_VALUE;
1168 
1169 	FDGetter fdGetter;
1170 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1171 
1172 	if (!descriptor)
1173 		return B_FILE_ERROR;
1174 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1175 		return B_FILE_ERROR;
1176 
1177 	if (pos == -1) {
1178 		pos = descriptor->pos;
1179 		movePosition = true;
1180 	}
1181 
1182 	if (descriptor->ops->fd_write == NULL)
1183 		return B_BAD_VALUE;
1184 
1185 	SyscallFlagUnsetter _;
1186 
1187 	ssize_t bytesWritten = 0;
1188 
1189 	for (i = 0; i < count; i++) {
1190 		size_t length = vecs[i].iov_len;
1191 		status = descriptor->ops->fd_write(descriptor, pos,
1192 			vecs[i].iov_base, &length);
1193 		if (status != B_OK) {
1194 			bytesWritten = status;
1195 			break;
1196 		}
1197 
1198 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1199 			bytesWritten = SSIZE_MAX;
1200 		else
1201 			bytesWritten += (ssize_t)length;
1202 
1203 		pos += vecs[i].iov_len;
1204 	}
1205 
1206 	if (movePosition)
1207 		descriptor->pos = pos;
1208 
1209 	return bytesWritten;
1210 }
1211 
1212 
1213 off_t
1214 _kern_seek(int fd, off_t pos, int seekType)
1215 {
1216 	struct file_descriptor* descriptor;
1217 
1218 	descriptor = get_fd(get_current_io_context(true), fd);
1219 	if (!descriptor)
1220 		return B_FILE_ERROR;
1221 
1222 	if (descriptor->ops->fd_seek)
1223 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1224 	else
1225 		pos = ESPIPE;
1226 
1227 	put_fd(descriptor);
1228 	return pos;
1229 }
1230 
1231 
1232 status_t
1233 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1234 {
1235 	TRACE(("kern_ioctl: fd %d\n", fd));
1236 
1237 	SyscallFlagUnsetter _;
1238 
1239 	return fd_ioctl(true, fd, op, buffer, length);
1240 }
1241 
1242 
1243 ssize_t
1244 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1245 	uint32 maxCount)
1246 {
1247 	struct file_descriptor* descriptor;
1248 	ssize_t retval;
1249 
1250 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1251 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1252 
1253 	struct io_context* ioContext = get_current_io_context(true);
1254 	descriptor = get_fd(ioContext, fd);
1255 	if (descriptor == NULL)
1256 		return B_FILE_ERROR;
1257 
1258 	if (descriptor->ops->fd_read_dir) {
1259 		uint32 count = maxCount;
1260 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1261 			bufferSize, &count);
1262 		if (retval >= 0)
1263 			retval = count;
1264 	} else
1265 		retval = B_UNSUPPORTED;
1266 
1267 	put_fd(descriptor);
1268 	return retval;
1269 }
1270 
1271 
1272 status_t
1273 _kern_rewind_dir(int fd)
1274 {
1275 	struct file_descriptor* descriptor;
1276 	status_t status;
1277 
1278 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1279 
1280 	descriptor = get_fd(get_current_io_context(true), fd);
1281 	if (descriptor == NULL)
1282 		return B_FILE_ERROR;
1283 
1284 	if (descriptor->ops->fd_rewind_dir)
1285 		status = descriptor->ops->fd_rewind_dir(descriptor);
1286 	else
1287 		status = B_UNSUPPORTED;
1288 
1289 	put_fd(descriptor);
1290 	return status;
1291 }
1292 
1293 
1294 status_t
1295 _kern_close(int fd)
1296 {
1297 	return common_close(fd, true);
1298 }
1299 
1300 
1301 int
1302 _kern_dup(int fd)
1303 {
1304 	return dup_fd(fd, true);
1305 }
1306 
1307 
1308 int
1309 _kern_dup2(int ofd, int nfd)
1310 {
1311 	return dup2_fd(ofd, nfd, true);
1312 }
1313 
1314