xref: /haiku/src/system/kernel/fs/fd.cpp (revision 4bd0c1066b227cec4b79883bdef697c7a27f2e90)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2015, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 
20 #include <syscalls.h>
21 #include <syscall_restart.h>
22 #include <util/AutoLock.h>
23 #include <vfs.h>
24 #include <wait_for_objects.h>
25 
26 #include "vfs_tracing.h"
27 
28 
29 //#define TRACE_FD
30 #ifdef TRACE_FD
31 #	define TRACE(x) dprintf x
32 #else
33 #	define TRACE(x)
34 #endif
35 
36 
37 static const size_t kMaxReadDirBufferSize = 64 * 1024;
38 
39 
40 static struct file_descriptor* get_fd_locked(struct io_context* context,
41 	int fd);
42 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
43 static void deselect_select_infos(file_descriptor* descriptor,
44 	select_info* infos, bool putSyncObjects);
45 
46 
47 struct FDGetterLocking {
48 	inline bool Lock(file_descriptor* /*lockable*/)
49 	{
50 		return false;
51 	}
52 
53 	inline void Unlock(file_descriptor* lockable)
54 	{
55 		put_fd(lockable);
56 	}
57 };
58 
59 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
60 public:
61 	inline FDGetter()
62 		: AutoLocker<file_descriptor, FDGetterLocking>()
63 	{
64 	}
65 
66 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
67 		: AutoLocker<file_descriptor, FDGetterLocking>(
68 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
69 	{
70 	}
71 
72 	inline file_descriptor* SetTo(io_context* context, int fd,
73 		bool contextLocked = false)
74 	{
75 		file_descriptor* descriptor
76 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
77 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
78 		return descriptor;
79 	}
80 
81 	inline file_descriptor* SetTo(int fd, bool kernel,
82 		bool contextLocked = false)
83 	{
84 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
85 	}
86 
87 	inline file_descriptor* FD() const
88 	{
89 		return fLockable;
90 	}
91 };
92 
93 
94 //	#pragma mark - General fd routines
95 
96 
97 #ifdef DEBUG
98 void dump_fd(int fd, struct file_descriptor* descriptor);
99 
100 void
101 dump_fd(int fd,struct file_descriptor* descriptor)
102 {
103 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
104 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
105 		", pos = %" B_PRId64 "\n",
106 		fd, descriptor, descriptor->type, descriptor->ref_count,
107 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
108 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
109 }
110 #endif
111 
112 
113 /*! Allocates and initializes a new file_descriptor.
114 */
115 struct file_descriptor*
116 alloc_fd(void)
117 {
118 	file_descriptor* descriptor
119 		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
120 	if (descriptor == NULL)
121 		return NULL;
122 
123 	descriptor->u.vnode = NULL;
124 	descriptor->cookie = NULL;
125 	descriptor->ref_count = 1;
126 	descriptor->open_count = 0;
127 	descriptor->open_mode = 0;
128 	descriptor->pos = 0;
129 
130 	return descriptor;
131 }
132 
133 
134 bool
135 fd_close_on_exec(struct io_context* context, int fd)
136 {
137 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
138 }
139 
140 
141 void
142 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
143 {
144 	if (closeFD)
145 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
146 	else
147 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
148 }
149 
150 
151 /*!	Searches a free slot in the FD table of the provided I/O context, and
152 	inserts the specified descriptor into it.
153 */
154 int
155 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
156 	int firstIndex)
157 {
158 	int fd = -1;
159 	uint32 i;
160 
161 	mutex_lock(&context->io_mutex);
162 
163 	for (i = firstIndex; i < context->table_size; i++) {
164 		if (!context->fds[i]) {
165 			fd = i;
166 			break;
167 		}
168 	}
169 	if (fd < 0) {
170 		fd = B_NO_MORE_FDS;
171 		goto err;
172 	}
173 
174 	TFD(NewFD(context, fd, descriptor));
175 
176 	context->fds[fd] = descriptor;
177 	context->num_used_fds++;
178 	atomic_add(&descriptor->open_count, 1);
179 
180 err:
181 	mutex_unlock(&context->io_mutex);
182 
183 	return fd;
184 }
185 
186 
187 int
188 new_fd(struct io_context* context, struct file_descriptor* descriptor)
189 {
190 	return new_fd_etc(context, descriptor, 0);
191 }
192 
193 
194 /*!	Reduces the descriptor's reference counter, and frees all resources
195 	when it's no longer used.
196 */
197 void
198 put_fd(struct file_descriptor* descriptor)
199 {
200 	int32 previous = atomic_add(&descriptor->ref_count, -1);
201 
202 	TFD(PutFD(descriptor));
203 
204 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
205 		descriptor, descriptor->ref_count, descriptor->cookie));
206 
207 	// free the descriptor if we don't need it anymore
208 	if (previous == 1) {
209 		// free the underlying object
210 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
211 			descriptor->ops->fd_free(descriptor);
212 
213 		free(descriptor);
214 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
215 		&& previous - 1 == descriptor->open_count
216 		&& descriptor->ops != NULL) {
217 		// the descriptor has been disconnected - it cannot
218 		// be accessed anymore, let's close it (no one is
219 		// currently accessing this descriptor)
220 
221 		if (descriptor->ops->fd_close)
222 			descriptor->ops->fd_close(descriptor);
223 		if (descriptor->ops->fd_free)
224 			descriptor->ops->fd_free(descriptor);
225 
226 		// prevent this descriptor from being closed/freed again
227 		descriptor->ops = NULL;
228 		descriptor->u.vnode = NULL;
229 
230 		// the file descriptor is kept intact, so that it's not
231 		// reused until someone explicitly closes it
232 	}
233 }
234 
235 
236 /*!	Decrements the open counter of the file descriptor and invokes
237 	its close hook when appropriate.
238 */
239 void
240 close_fd(struct file_descriptor* descriptor)
241 {
242 	if (atomic_add(&descriptor->open_count, -1) == 1) {
243 		vfs_unlock_vnode_if_locked(descriptor);
244 
245 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
246 			descriptor->ops->fd_close(descriptor);
247 	}
248 }
249 
250 
251 status_t
252 close_fd_index(struct io_context* context, int fd)
253 {
254 	struct file_descriptor* descriptor = remove_fd(context, fd);
255 
256 	if (descriptor == NULL)
257 		return B_FILE_ERROR;
258 
259 	close_fd(descriptor);
260 	put_fd(descriptor);
261 		// the reference associated with the slot
262 
263 	return B_OK;
264 }
265 
266 
267 /*!	This descriptor's underlying object will be closed and freed as soon as
268 	possible (in one of the next calls to put_fd() - get_fd() will no longer
269 	succeed on this descriptor).
270 	This is useful if the underlying object is gone, for instance when a
271 	(mounted) volume got removed unexpectedly.
272 */
273 void
274 disconnect_fd(struct file_descriptor* descriptor)
275 {
276 	descriptor->open_mode |= O_DISCONNECTED;
277 }
278 
279 
280 void
281 inc_fd_ref_count(struct file_descriptor* descriptor)
282 {
283 	atomic_add(&descriptor->ref_count, 1);
284 }
285 
286 
287 static struct file_descriptor*
288 get_fd_locked(struct io_context* context, int fd)
289 {
290 	if (fd < 0 || (uint32)fd >= context->table_size)
291 		return NULL;
292 
293 	struct file_descriptor* descriptor = context->fds[fd];
294 
295 	if (descriptor != NULL) {
296 		TFD(GetFD(context, fd, descriptor));
297 		inc_fd_ref_count(descriptor);
298 	}
299 
300 	return descriptor;
301 }
302 
303 
304 struct file_descriptor*
305 get_fd(struct io_context* context, int fd)
306 {
307 	MutexLocker _(context->io_mutex);
308 
309 	return get_fd_locked(context, fd);
310 }
311 
312 
313 struct file_descriptor*
314 get_open_fd(struct io_context* context, int fd)
315 {
316 	MutexLocker _(context->io_mutex);
317 
318 	file_descriptor* descriptor = get_fd_locked(context, fd);
319 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
320 		return NULL;
321 
322 	atomic_add(&descriptor->open_count, 1);
323 
324 	return descriptor;
325 }
326 
327 
328 /*!	Removes the file descriptor from the specified slot.
329 */
330 static struct file_descriptor*
331 remove_fd(struct io_context* context, int fd)
332 {
333 	struct file_descriptor* descriptor = NULL;
334 
335 	if (fd < 0)
336 		return NULL;
337 
338 	mutex_lock(&context->io_mutex);
339 
340 	if ((uint32)fd < context->table_size)
341 		descriptor = context->fds[fd];
342 
343 	select_info* selectInfos = NULL;
344 	bool disconnected = false;
345 
346 	if (descriptor != NULL)	{
347 		// fd is valid
348 		TFD(RemoveFD(context, fd, descriptor));
349 
350 		context->fds[fd] = NULL;
351 		fd_set_close_on_exec(context, fd, false);
352 		context->num_used_fds--;
353 
354 		selectInfos = context->select_infos[fd];
355 		context->select_infos[fd] = NULL;
356 
357 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
358 	}
359 
360 	mutex_unlock(&context->io_mutex);
361 
362 	if (selectInfos != NULL)
363 		deselect_select_infos(descriptor, selectInfos, true);
364 
365 	return disconnected ? NULL : descriptor;
366 }
367 
368 
369 static int
370 dup_fd(int fd, bool kernel)
371 {
372 	struct io_context* context = get_current_io_context(kernel);
373 	struct file_descriptor* descriptor;
374 	int status;
375 
376 	TRACE(("dup_fd: fd = %d\n", fd));
377 
378 	// Try to get the fd structure
379 	descriptor = get_fd(context, fd);
380 	if (descriptor == NULL)
381 		return B_FILE_ERROR;
382 
383 	// now put the fd in place
384 	status = new_fd(context, descriptor);
385 	if (status < 0)
386 		put_fd(descriptor);
387 	else {
388 		mutex_lock(&context->io_mutex);
389 		fd_set_close_on_exec(context, status, false);
390 		mutex_unlock(&context->io_mutex);
391 	}
392 
393 	return status;
394 }
395 
396 
397 /*!	POSIX says this should be the same as:
398 		close(newfd);
399 		fcntl(oldfd, F_DUPFD, newfd);
400 
401 	We do dup2() directly to be thread-safe.
402 */
403 static int
404 dup2_fd(int oldfd, int newfd, bool kernel)
405 {
406 	struct file_descriptor* evicted = NULL;
407 	struct io_context* context;
408 
409 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
410 
411 	// quick check
412 	if (oldfd < 0 || newfd < 0)
413 		return B_FILE_ERROR;
414 
415 	// Get current I/O context and lock it
416 	context = get_current_io_context(kernel);
417 	mutex_lock(&context->io_mutex);
418 
419 	// Check if the fds are valid (mutex must be locked because
420 	// the table size could be changed)
421 	if ((uint32)oldfd >= context->table_size
422 		|| (uint32)newfd >= context->table_size
423 		|| context->fds[oldfd] == NULL
424 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
425 		mutex_unlock(&context->io_mutex);
426 		return B_FILE_ERROR;
427 	}
428 
429 	// Check for identity, note that it cannot be made above
430 	// because we always want to return an error on invalid
431 	// handles
432 	select_info* selectInfos = NULL;
433 	if (oldfd != newfd) {
434 		// Now do the work
435 		TFD(Dup2FD(context, oldfd, newfd));
436 
437 		evicted = context->fds[newfd];
438 		selectInfos = context->select_infos[newfd];
439 		context->select_infos[newfd] = NULL;
440 		atomic_add(&context->fds[oldfd]->ref_count, 1);
441 		atomic_add(&context->fds[oldfd]->open_count, 1);
442 		context->fds[newfd] = context->fds[oldfd];
443 
444 		if (evicted == NULL)
445 			context->num_used_fds++;
446 	}
447 
448 	fd_set_close_on_exec(context, newfd, false);
449 
450 	mutex_unlock(&context->io_mutex);
451 
452 	// Say bye bye to the evicted fd
453 	if (evicted) {
454 		deselect_select_infos(evicted, selectInfos, true);
455 		close_fd(evicted);
456 		put_fd(evicted);
457 	}
458 
459 	return newfd;
460 }
461 
462 
463 /*!	Duplicates an FD from another team to this/the kernel team.
464 	\param fromTeam The team which owns the FD.
465 	\param fd The FD to duplicate.
466 	\param kernel If \c true, the new FD will be created in the kernel team,
467 			the current userland team otherwise.
468 	\return The newly created FD or an error code, if something went wrong.
469 */
470 int
471 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
472 {
473 	// get the I/O context for the team in question
474 	Team* team = Team::Get(fromTeam);
475 	if (team == NULL)
476 		return B_BAD_TEAM_ID;
477 	BReference<Team> teamReference(team, true);
478 
479 	io_context* fromContext = team->io_context;
480 
481 	// get the file descriptor
482 	file_descriptor* descriptor = get_fd(fromContext, fd);
483 	if (descriptor == NULL)
484 		return B_FILE_ERROR;
485 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
486 
487 	// create a new FD in the target I/O context
488 	int result = new_fd(get_current_io_context(kernel), descriptor);
489 	if (result >= 0) {
490 		// the descriptor reference belongs to the slot, now
491 		descriptorPutter.Detach();
492 	}
493 
494 	return result;
495 }
496 
497 
498 static status_t
499 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
500 {
501 	struct file_descriptor* descriptor;
502 	int status;
503 
504 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
505 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
506 		return B_FILE_ERROR;
507 
508 	if (descriptor->ops->fd_ioctl != NULL)
509 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
510 	else
511 		status = B_DEV_INVALID_IOCTL;
512 
513 	if (status == B_DEV_INVALID_IOCTL)
514 		status = ENOTTY;
515 
516 	put_fd(descriptor);
517 	return status;
518 }
519 
520 
521 static void
522 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
523 	bool putSyncObjects)
524 {
525 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
526 
527 	select_info* info = infos;
528 	while (info != NULL) {
529 		select_sync* sync = info->sync;
530 
531 		// deselect the selected events
532 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
533 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
534 			for (uint16 event = 1; event < 16; event++) {
535 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
536 					descriptor->ops->fd_deselect(descriptor, event,
537 						(selectsync*)info);
538 				}
539 			}
540 		}
541 
542 		notify_select_events(info, B_EVENT_INVALID);
543 		info = info->next;
544 
545 		if (putSyncObjects)
546 			put_select_sync(sync);
547 	}
548 }
549 
550 
551 status_t
552 select_fd(int32 fd, struct select_info* info, bool kernel)
553 {
554 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
555 		info->sync, info->selected_events));
556 
557 	FDGetter fdGetter;
558 		// define before the context locker, so it will be destroyed after it
559 
560 	io_context* context = get_current_io_context(kernel);
561 	MutexLocker locker(context->io_mutex);
562 
563 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
564 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
565 		return B_FILE_ERROR;
566 
567 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
568 
569 	if (descriptor->ops->fd_select == NULL && eventsToSelect != 0) {
570 		// if the I/O subsystem doesn't support select(), we will
571 		// immediately notify the select call
572 		return notify_select_events(info, eventsToSelect);
573 	}
574 
575 	// We need the FD to stay open while we're doing this, so no select()/
576 	// deselect() will be called on it after it is closed.
577 	atomic_add(&descriptor->open_count, 1);
578 
579 	locker.Unlock();
580 
581 	// select any events asked for
582 	uint32 selectedEvents = 0;
583 
584 	for (uint16 event = 1; event < 16; event++) {
585 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
586 			&& descriptor->ops->fd_select(descriptor, event,
587 				(selectsync*)info) == B_OK) {
588 			selectedEvents |= SELECT_FLAG(event);
589 		}
590 	}
591 	info->selected_events = selectedEvents
592 		| (info->selected_events & B_EVENT_INVALID);
593 
594 	// Add the info to the IO context. Even if nothing has been selected -- we
595 	// always support B_EVENT_INVALID.
596 	locker.Lock();
597 	if (context->fds[fd] != descriptor) {
598 		// Someone close()d the index in the meantime. deselect() all
599 		// events.
600 		info->next = NULL;
601 		deselect_select_infos(descriptor, info, false);
602 
603 		// Release our open reference of the descriptor.
604 		close_fd(descriptor);
605 		return B_FILE_ERROR;
606 	}
607 
608 	// The FD index hasn't changed, so we add the select info to the table.
609 
610 	info->next = context->select_infos[fd];
611 	context->select_infos[fd] = info;
612 
613 	// As long as the info is in the list, we keep a reference to the sync
614 	// object.
615 	atomic_add(&info->sync->ref_count, 1);
616 
617 	// Finally release our open reference. It is safe just to decrement,
618 	// since as long as the descriptor is associated with the slot,
619 	// someone else still has it open.
620 	atomic_add(&descriptor->open_count, -1);
621 
622 	return B_OK;
623 }
624 
625 
626 status_t
627 deselect_fd(int32 fd, struct select_info* info, bool kernel)
628 {
629 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
630 		info->sync, info->selected_events));
631 
632 	FDGetter fdGetter;
633 		// define before the context locker, so it will be destroyed after it
634 
635 	io_context* context = get_current_io_context(kernel);
636 	MutexLocker locker(context->io_mutex);
637 
638 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
639 	if (descriptor == NULL)
640 		return B_FILE_ERROR;
641 
642 	// remove the info from the IO context
643 
644 	select_info** infoLocation = &context->select_infos[fd];
645 	while (*infoLocation != NULL && *infoLocation != info)
646 		infoLocation = &(*infoLocation)->next;
647 
648 	// If not found, someone else beat us to it.
649 	if (*infoLocation != info)
650 		return B_OK;
651 
652 	*infoLocation = info->next;
653 
654 	locker.Unlock();
655 
656 	// deselect the selected events
657 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
658 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
659 		for (uint16 event = 1; event < 16; event++) {
660 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
661 				descriptor->ops->fd_deselect(descriptor, event,
662 					(selectsync*)info);
663 			}
664 		}
665 	}
666 
667 	put_select_sync(info->sync);
668 
669 	return B_OK;
670 }
671 
672 
673 /*!	This function checks if the specified fd is valid in the current
674 	context. It can be used for a quick check; the fd is not locked
675 	so it could become invalid immediately after this check.
676 */
677 bool
678 fd_is_valid(int fd, bool kernel)
679 {
680 	struct file_descriptor* descriptor
681 		= get_fd(get_current_io_context(kernel), fd);
682 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
683 		return false;
684 
685 	put_fd(descriptor);
686 	return true;
687 }
688 
689 
690 struct vnode*
691 fd_vnode(struct file_descriptor* descriptor)
692 {
693 	switch (descriptor->type) {
694 		case FDTYPE_FILE:
695 		case FDTYPE_DIR:
696 		case FDTYPE_ATTR_DIR:
697 		case FDTYPE_ATTR:
698 			return descriptor->u.vnode;
699 	}
700 
701 	return NULL;
702 }
703 
704 
705 static status_t
706 common_close(int fd, bool kernel)
707 {
708 	return close_fd_index(get_current_io_context(kernel), fd);
709 }
710 
711 
712 static ssize_t
713 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
714 {
715 	if (!IS_USER_ADDRESS(buffer))
716 		return B_BAD_ADDRESS;
717 
718 	if (pos < -1)
719 		return B_BAD_VALUE;
720 
721 	FDGetter fdGetter;
722 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
723 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
724 		return B_FILE_ERROR;
725 
726 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
727 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
728 		return B_FILE_ERROR;
729 	}
730 
731 	bool movePosition = false;
732 	if (pos == -1) {
733 		pos = descriptor->pos;
734 		movePosition = true;
735 	}
736 
737 	if (write ? descriptor->ops->fd_write == NULL
738 			: descriptor->ops->fd_read == NULL) {
739 		return B_BAD_VALUE;
740 	}
741 
742 	SyscallRestartWrapper<status_t> status;
743 
744 	if (write)
745 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
746 	else
747 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
748 
749 	if (status != B_OK)
750 		return status;
751 
752 	if (movePosition)
753 		descriptor->pos = pos + length;
754 
755 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
756 }
757 
758 
759 static ssize_t
760 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
761 	bool write)
762 {
763 	if (!IS_USER_ADDRESS(userVecs))
764 		return B_BAD_ADDRESS;
765 
766 	if (pos < -1)
767 		return B_BAD_VALUE;
768 
769 	// prevent integer overflow exploit in malloc()
770 	if (count > IOV_MAX)
771 		return B_BAD_VALUE;
772 
773 	FDGetter fdGetter;
774 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
775 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
776 		return B_FILE_ERROR;
777 
778 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
779 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
780 		return B_FILE_ERROR;
781 	}
782 
783 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
784 	if (vecs == NULL)
785 		return B_NO_MEMORY;
786 	MemoryDeleter _(vecs);
787 
788 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
789 		return B_BAD_ADDRESS;
790 
791 	bool movePosition = false;
792 	if (pos == -1) {
793 		pos = descriptor->pos;
794 		movePosition = true;
795 	}
796 
797 	if (write ? descriptor->ops->fd_write == NULL
798 			: descriptor->ops->fd_read == NULL) {
799 		return B_BAD_VALUE;
800 	}
801 
802 	SyscallRestartWrapper<status_t> status;
803 
804 	ssize_t bytesTransferred = 0;
805 	for (uint32 i = 0; i < count; i++) {
806 		size_t length = vecs[i].iov_len;
807 		if (write) {
808 			status = descriptor->ops->fd_write(descriptor, pos,
809 				vecs[i].iov_base, &length);
810 		} else {
811 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
812 				&length);
813 		}
814 
815 		if (status != B_OK) {
816 			if (bytesTransferred == 0)
817 				return status;
818 			status = B_OK;
819 			break;
820 		}
821 
822 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
823 			bytesTransferred = SSIZE_MAX;
824 		else
825 			bytesTransferred += (ssize_t)length;
826 
827 		pos += length;
828 
829 		if (length < vecs[i].iov_len)
830 			break;
831 	}
832 
833 	if (movePosition)
834 		descriptor->pos = pos;
835 
836 	return bytesTransferred;
837 }
838 
839 
840 status_t
841 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
842 {
843 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
844 
845 	return fd_ioctl(false, fd, op, buffer, length);
846 }
847 
848 
849 //	#pragma mark - User syscalls
850 
851 
852 ssize_t
853 _user_read(int fd, off_t pos, void* buffer, size_t length)
854 {
855 	return common_user_io(fd, pos, buffer, length, false);
856 }
857 
858 
859 ssize_t
860 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
861 {
862 	return common_user_vector_io(fd, pos, userVecs, count, false);
863 }
864 
865 
866 ssize_t
867 _user_write(int fd, off_t pos, const void* buffer, size_t length)
868 {
869 	return common_user_io(fd, pos, (void*)buffer, length, true);
870 }
871 
872 
873 ssize_t
874 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
875 {
876 	return common_user_vector_io(fd, pos, userVecs, count, true);
877 }
878 
879 
880 off_t
881 _user_seek(int fd, off_t pos, int seekType)
882 {
883 	syscall_64_bit_return_value();
884 
885 	struct file_descriptor* descriptor;
886 
887 	descriptor = get_fd(get_current_io_context(false), fd);
888 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
889 		return B_FILE_ERROR;
890 
891 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
892 
893 	if (descriptor->ops->fd_seek != NULL)
894 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
895 	else
896 		pos = ESPIPE;
897 
898 	put_fd(descriptor);
899 	return pos;
900 }
901 
902 
903 status_t
904 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
905 {
906 	if (!IS_USER_ADDRESS(buffer))
907 		return B_BAD_ADDRESS;
908 
909 	TRACE(("user_ioctl: fd %d\n", fd));
910 
911 	SyscallRestartWrapper<status_t> status;
912 
913 	return status = fd_ioctl(false, fd, op, buffer, length);
914 }
915 
916 
917 ssize_t
918 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
919 	uint32 maxCount)
920 {
921 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
922 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
923 
924 	if (maxCount == 0)
925 		return 0;
926 
927 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
928 		return B_BAD_ADDRESS;
929 
930 	// get I/O context and FD
931 	io_context* ioContext = get_current_io_context(false);
932 	FDGetter fdGetter;
933 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
934 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
935 		return B_FILE_ERROR;
936 
937 	if (descriptor->ops->fd_read_dir == NULL)
938 		return B_UNSUPPORTED;
939 
940 	// restrict buffer size and allocate a heap buffer
941 	if (bufferSize > kMaxReadDirBufferSize)
942 		bufferSize = kMaxReadDirBufferSize;
943 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
944 	if (buffer == NULL)
945 		return B_NO_MEMORY;
946 	MemoryDeleter bufferDeleter(buffer);
947 
948 	// read the directory
949 	uint32 count = maxCount;
950 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
951 		buffer, bufferSize, &count);
952 	if (status != B_OK)
953 		return status;
954 
955 	// copy the buffer back -- determine the total buffer size first
956 	size_t sizeToCopy = 0;
957 	struct dirent* entry = buffer;
958 	for (uint32 i = 0; i < count; i++) {
959 		size_t length = entry->d_reclen;
960 		sizeToCopy += length;
961 		entry = (struct dirent*)((uint8*)entry + length);
962 	}
963 
964 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
965 		return B_BAD_ADDRESS;
966 
967 	return count;
968 }
969 
970 
971 status_t
972 _user_rewind_dir(int fd)
973 {
974 	struct file_descriptor* descriptor;
975 	status_t status;
976 
977 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
978 
979 	descriptor = get_fd(get_current_io_context(false), fd);
980 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
981 		return B_FILE_ERROR;
982 
983 	if (descriptor->ops->fd_rewind_dir != NULL)
984 		status = descriptor->ops->fd_rewind_dir(descriptor);
985 	else
986 		status = B_UNSUPPORTED;
987 
988 	put_fd(descriptor);
989 	return status;
990 }
991 
992 
993 status_t
994 _user_close(int fd)
995 {
996 	return common_close(fd, false);
997 }
998 
999 
1000 int
1001 _user_dup(int fd)
1002 {
1003 	return dup_fd(fd, false);
1004 }
1005 
1006 
1007 int
1008 _user_dup2(int ofd, int nfd)
1009 {
1010 	return dup2_fd(ofd, nfd, false);
1011 }
1012 
1013 
1014 //	#pragma mark - Kernel calls
1015 
1016 
1017 ssize_t
1018 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1019 {
1020 	if (pos < -1)
1021 		return B_BAD_VALUE;
1022 
1023 	FDGetter fdGetter;
1024 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1025 
1026 	if (!descriptor)
1027 		return B_FILE_ERROR;
1028 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1029 		return B_FILE_ERROR;
1030 
1031 	bool movePosition = false;
1032 	if (pos == -1) {
1033 		pos = descriptor->pos;
1034 		movePosition = true;
1035 	}
1036 
1037 	SyscallFlagUnsetter _;
1038 
1039 	if (descriptor->ops->fd_read == NULL)
1040 		return B_BAD_VALUE;
1041 
1042 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1043 		&length);
1044 	if (bytesRead >= B_OK) {
1045 		if (length > SSIZE_MAX)
1046 			bytesRead = SSIZE_MAX;
1047 		else
1048 			bytesRead = (ssize_t)length;
1049 
1050 		if (movePosition)
1051 			descriptor->pos = pos + length;
1052 	}
1053 
1054 	return bytesRead;
1055 }
1056 
1057 
1058 ssize_t
1059 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1060 {
1061 	bool movePosition = false;
1062 	status_t status;
1063 	uint32 i;
1064 
1065 	if (pos < -1)
1066 		return B_BAD_VALUE;
1067 
1068 	FDGetter fdGetter;
1069 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1070 
1071 	if (!descriptor)
1072 		return B_FILE_ERROR;
1073 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1074 		return B_FILE_ERROR;
1075 
1076 	if (pos == -1) {
1077 		pos = descriptor->pos;
1078 		movePosition = true;
1079 	}
1080 
1081 	if (descriptor->ops->fd_read == NULL)
1082 		return B_BAD_VALUE;
1083 
1084 	SyscallFlagUnsetter _;
1085 
1086 	ssize_t bytesRead = 0;
1087 
1088 	for (i = 0; i < count; i++) {
1089 		size_t length = vecs[i].iov_len;
1090 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1091 			&length);
1092 		if (status != B_OK) {
1093 			bytesRead = status;
1094 			break;
1095 		}
1096 
1097 		if ((uint64)bytesRead + length > SSIZE_MAX)
1098 			bytesRead = SSIZE_MAX;
1099 		else
1100 			bytesRead += (ssize_t)length;
1101 
1102 		pos += vecs[i].iov_len;
1103 	}
1104 
1105 	if (movePosition)
1106 		descriptor->pos = pos;
1107 
1108 	return bytesRead;
1109 }
1110 
1111 
1112 ssize_t
1113 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1114 {
1115 	if (pos < -1)
1116 		return B_BAD_VALUE;
1117 
1118 	FDGetter fdGetter;
1119 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1120 
1121 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1122 		return B_FILE_ERROR;
1123 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1124 		return B_FILE_ERROR;
1125 
1126 	bool movePosition = false;
1127 	if (pos == -1) {
1128 		pos = descriptor->pos;
1129 		movePosition = true;
1130 	}
1131 
1132 	if (descriptor->ops->fd_write == NULL)
1133 		return B_BAD_VALUE;
1134 
1135 	SyscallFlagUnsetter _;
1136 
1137 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1138 		&length);
1139 	if (bytesWritten >= B_OK) {
1140 		if (length > SSIZE_MAX)
1141 			bytesWritten = SSIZE_MAX;
1142 		else
1143 			bytesWritten = (ssize_t)length;
1144 
1145 		if (movePosition)
1146 			descriptor->pos = pos + length;
1147 	}
1148 
1149 	return bytesWritten;
1150 }
1151 
1152 
1153 ssize_t
1154 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1155 {
1156 	bool movePosition = false;
1157 	status_t status;
1158 	uint32 i;
1159 
1160 	if (pos < -1)
1161 		return B_BAD_VALUE;
1162 
1163 	FDGetter fdGetter;
1164 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1165 
1166 	if (!descriptor)
1167 		return B_FILE_ERROR;
1168 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1169 		return B_FILE_ERROR;
1170 
1171 	if (pos == -1) {
1172 		pos = descriptor->pos;
1173 		movePosition = true;
1174 	}
1175 
1176 	if (descriptor->ops->fd_write == NULL)
1177 		return B_BAD_VALUE;
1178 
1179 	SyscallFlagUnsetter _;
1180 
1181 	ssize_t bytesWritten = 0;
1182 
1183 	for (i = 0; i < count; i++) {
1184 		size_t length = vecs[i].iov_len;
1185 		status = descriptor->ops->fd_write(descriptor, pos,
1186 			vecs[i].iov_base, &length);
1187 		if (status != B_OK) {
1188 			bytesWritten = status;
1189 			break;
1190 		}
1191 
1192 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1193 			bytesWritten = SSIZE_MAX;
1194 		else
1195 			bytesWritten += (ssize_t)length;
1196 
1197 		pos += vecs[i].iov_len;
1198 	}
1199 
1200 	if (movePosition)
1201 		descriptor->pos = pos;
1202 
1203 	return bytesWritten;
1204 }
1205 
1206 
1207 off_t
1208 _kern_seek(int fd, off_t pos, int seekType)
1209 {
1210 	struct file_descriptor* descriptor;
1211 
1212 	descriptor = get_fd(get_current_io_context(true), fd);
1213 	if (!descriptor)
1214 		return B_FILE_ERROR;
1215 
1216 	if (descriptor->ops->fd_seek)
1217 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1218 	else
1219 		pos = ESPIPE;
1220 
1221 	put_fd(descriptor);
1222 	return pos;
1223 }
1224 
1225 
1226 status_t
1227 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1228 {
1229 	TRACE(("kern_ioctl: fd %d\n", fd));
1230 
1231 	SyscallFlagUnsetter _;
1232 
1233 	return fd_ioctl(true, fd, op, buffer, length);
1234 }
1235 
1236 
1237 ssize_t
1238 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1239 	uint32 maxCount)
1240 {
1241 	struct file_descriptor* descriptor;
1242 	ssize_t retval;
1243 
1244 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1245 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1246 
1247 	struct io_context* ioContext = get_current_io_context(true);
1248 	descriptor = get_fd(ioContext, fd);
1249 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1250 		return B_FILE_ERROR;
1251 
1252 	if (descriptor->ops->fd_read_dir) {
1253 		uint32 count = maxCount;
1254 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1255 			bufferSize, &count);
1256 		if (retval >= 0)
1257 			retval = count;
1258 	} else
1259 		retval = B_UNSUPPORTED;
1260 
1261 	put_fd(descriptor);
1262 	return retval;
1263 }
1264 
1265 
1266 status_t
1267 _kern_rewind_dir(int fd)
1268 {
1269 	struct file_descriptor* descriptor;
1270 	status_t status;
1271 
1272 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1273 
1274 	descriptor = get_fd(get_current_io_context(true), fd);
1275 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1276 		return B_FILE_ERROR;
1277 
1278 	if (descriptor->ops->fd_rewind_dir)
1279 		status = descriptor->ops->fd_rewind_dir(descriptor);
1280 	else
1281 		status = B_UNSUPPORTED;
1282 
1283 	put_fd(descriptor);
1284 	return status;
1285 }
1286 
1287 
1288 status_t
1289 _kern_close(int fd)
1290 {
1291 	return common_close(fd, true);
1292 }
1293 
1294 
1295 int
1296 _kern_dup(int fd)
1297 {
1298 	return dup_fd(fd, true);
1299 }
1300 
1301 
1302 int
1303 _kern_dup2(int ofd, int nfd)
1304 {
1305 	return dup2_fd(ofd, nfd, true);
1306 }
1307 
1308