xref: /haiku/src/system/kernel/fs/fd.cpp (revision 5c1e072463878d1d30d9ecb9842e6d461132306e)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 #include <BytePointer.h>
20 
21 #include <syscalls.h>
22 #include <syscall_restart.h>
23 #include <util/AutoLock.h>
24 #include <vfs.h>
25 #include <wait_for_objects.h>
26 
27 #include "vfs_tracing.h"
28 
29 
30 //#define TRACE_FD
31 #ifdef TRACE_FD
32 #	define TRACE(x) dprintf x
33 #else
34 #	define TRACE(x)
35 #endif
36 
37 
38 static const size_t kMaxReadDirBufferSize = 64 * 1024;
39 
40 
41 static struct file_descriptor* get_fd_locked(struct io_context* context,
42 	int fd);
43 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
44 static void deselect_select_infos(file_descriptor* descriptor,
45 	select_info* infos, bool putSyncObjects);
46 
47 
48 struct FDGetterLocking {
49 	inline bool Lock(file_descriptor* /*lockable*/)
50 	{
51 		return false;
52 	}
53 
54 	inline void Unlock(file_descriptor* lockable)
55 	{
56 		put_fd(lockable);
57 	}
58 };
59 
60 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
61 public:
62 	inline FDGetter()
63 		: AutoLocker<file_descriptor, FDGetterLocking>()
64 	{
65 	}
66 
67 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
68 		: AutoLocker<file_descriptor, FDGetterLocking>(
69 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
70 	{
71 	}
72 
73 	inline file_descriptor* SetTo(io_context* context, int fd,
74 		bool contextLocked = false)
75 	{
76 		file_descriptor* descriptor
77 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
78 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
79 		return descriptor;
80 	}
81 
82 	inline file_descriptor* SetTo(int fd, bool kernel,
83 		bool contextLocked = false)
84 	{
85 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
86 	}
87 
88 	inline file_descriptor* FD() const
89 	{
90 		return fLockable;
91 	}
92 };
93 
94 
95 //	#pragma mark - General fd routines
96 
97 
98 #ifdef DEBUG
99 void dump_fd(int fd, struct file_descriptor* descriptor);
100 
101 void
102 dump_fd(int fd,struct file_descriptor* descriptor)
103 {
104 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
105 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
106 		", pos = %" B_PRId64 "\n",
107 		fd, descriptor, descriptor->type, descriptor->ref_count,
108 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
109 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
110 }
111 #endif
112 
113 
114 /*! Allocates and initializes a new file_descriptor.
115 */
116 struct file_descriptor*
117 alloc_fd(void)
118 {
119 	file_descriptor* descriptor
120 		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
121 	if (descriptor == NULL)
122 		return NULL;
123 
124 	descriptor->u.vnode = NULL;
125 	descriptor->cookie = NULL;
126 	descriptor->ref_count = 1;
127 	descriptor->open_count = 0;
128 	descriptor->open_mode = 0;
129 	descriptor->pos = 0;
130 
131 	return descriptor;
132 }
133 
134 
135 bool
136 fd_close_on_exec(struct io_context* context, int fd)
137 {
138 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
139 }
140 
141 
142 void
143 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
144 {
145 	if (closeFD)
146 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
147 	else
148 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
149 }
150 
151 
152 /*!	Searches a free slot in the FD table of the provided I/O context, and
153 	inserts the specified descriptor into it.
154 */
155 int
156 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
157 	int firstIndex)
158 {
159 	int fd = -1;
160 	uint32 i;
161 
162 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
163 		return B_BAD_VALUE;
164 
165 	mutex_lock(&context->io_mutex);
166 
167 	for (i = firstIndex; i < context->table_size; i++) {
168 		if (!context->fds[i]) {
169 			fd = i;
170 			break;
171 		}
172 	}
173 	if (fd < 0) {
174 		fd = B_NO_MORE_FDS;
175 		goto err;
176 	}
177 
178 	TFD(NewFD(context, fd, descriptor));
179 
180 	context->fds[fd] = descriptor;
181 	context->num_used_fds++;
182 	atomic_add(&descriptor->open_count, 1);
183 
184 err:
185 	mutex_unlock(&context->io_mutex);
186 
187 	return fd;
188 }
189 
190 
191 int
192 new_fd(struct io_context* context, struct file_descriptor* descriptor)
193 {
194 	return new_fd_etc(context, descriptor, 0);
195 }
196 
197 
198 /*!	Reduces the descriptor's reference counter, and frees all resources
199 	when it's no longer used.
200 */
201 void
202 put_fd(struct file_descriptor* descriptor)
203 {
204 	int32 previous = atomic_add(&descriptor->ref_count, -1);
205 
206 	TFD(PutFD(descriptor));
207 
208 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
209 		descriptor, descriptor->ref_count, descriptor->cookie));
210 
211 	// free the descriptor if we don't need it anymore
212 	if (previous == 1) {
213 		// free the underlying object
214 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
215 			descriptor->ops->fd_free(descriptor);
216 
217 		free(descriptor);
218 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
219 		&& previous - 1 == descriptor->open_count
220 		&& descriptor->ops != NULL) {
221 		// the descriptor has been disconnected - it cannot
222 		// be accessed anymore, let's close it (no one is
223 		// currently accessing this descriptor)
224 
225 		if (descriptor->ops->fd_close)
226 			descriptor->ops->fd_close(descriptor);
227 		if (descriptor->ops->fd_free)
228 			descriptor->ops->fd_free(descriptor);
229 
230 		// prevent this descriptor from being closed/freed again
231 		descriptor->ops = NULL;
232 		descriptor->u.vnode = NULL;
233 
234 		// the file descriptor is kept intact, so that it's not
235 		// reused until someone explicitly closes it
236 	}
237 }
238 
239 
240 /*!	Decrements the open counter of the file descriptor and invokes
241 	its close hook when appropriate.
242 */
243 void
244 close_fd(struct io_context* context, struct file_descriptor* descriptor)
245 {
246 	// POSIX advisory locks need to be released when any file descriptor closes
247 	if (descriptor->type == FDTYPE_FILE)
248 		vfs_release_posix_lock(context, descriptor);
249 
250 	if (atomic_add(&descriptor->open_count, -1) == 1) {
251 		vfs_unlock_vnode_if_locked(descriptor);
252 
253 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
254 			descriptor->ops->fd_close(descriptor);
255 	}
256 }
257 
258 
259 status_t
260 close_fd_index(struct io_context* context, int fd)
261 {
262 	struct file_descriptor* descriptor = remove_fd(context, fd);
263 
264 	if (descriptor == NULL)
265 		return B_FILE_ERROR;
266 
267 	close_fd(context, descriptor);
268 	put_fd(descriptor);
269 		// the reference associated with the slot
270 
271 	return B_OK;
272 }
273 
274 
275 /*!	This descriptor's underlying object will be closed and freed as soon as
276 	possible (in one of the next calls to put_fd() - get_fd() will no longer
277 	succeed on this descriptor).
278 	This is useful if the underlying object is gone, for instance when a
279 	(mounted) volume got removed unexpectedly.
280 */
281 void
282 disconnect_fd(struct file_descriptor* descriptor)
283 {
284 	descriptor->open_mode |= O_DISCONNECTED;
285 }
286 
287 
288 void
289 inc_fd_ref_count(struct file_descriptor* descriptor)
290 {
291 	atomic_add(&descriptor->ref_count, 1);
292 }
293 
294 
295 static struct file_descriptor*
296 get_fd_locked(struct io_context* context, int fd)
297 {
298 	if (fd < 0 || (uint32)fd >= context->table_size)
299 		return NULL;
300 
301 	struct file_descriptor* descriptor = context->fds[fd];
302 
303 	if (descriptor != NULL) {
304 		// disconnected descriptors cannot be accessed anymore
305 		if (descriptor->open_mode & O_DISCONNECTED)
306 			return NULL;
307 
308 		TFD(GetFD(context, fd, descriptor));
309 		inc_fd_ref_count(descriptor);
310 	}
311 
312 	return descriptor;
313 }
314 
315 
316 struct file_descriptor*
317 get_fd(struct io_context* context, int fd)
318 {
319 	MutexLocker _(context->io_mutex);
320 
321 	return get_fd_locked(context, fd);
322 }
323 
324 
325 struct file_descriptor*
326 get_open_fd(struct io_context* context, int fd)
327 {
328 	MutexLocker _(context->io_mutex);
329 
330 	file_descriptor* descriptor = get_fd_locked(context, fd);
331 	if (descriptor == NULL)
332 		return NULL;
333 
334 	atomic_add(&descriptor->open_count, 1);
335 
336 	return descriptor;
337 }
338 
339 
340 /*!	Removes the file descriptor from the specified slot.
341 */
342 static struct file_descriptor*
343 remove_fd(struct io_context* context, int fd)
344 {
345 	struct file_descriptor* descriptor = NULL;
346 
347 	if (fd < 0)
348 		return NULL;
349 
350 	mutex_lock(&context->io_mutex);
351 
352 	if ((uint32)fd < context->table_size)
353 		descriptor = context->fds[fd];
354 
355 	select_info* selectInfos = NULL;
356 	bool disconnected = false;
357 
358 	if (descriptor != NULL)	{
359 		// fd is valid
360 		TFD(RemoveFD(context, fd, descriptor));
361 
362 		context->fds[fd] = NULL;
363 		fd_set_close_on_exec(context, fd, false);
364 		context->num_used_fds--;
365 
366 		selectInfos = context->select_infos[fd];
367 		context->select_infos[fd] = NULL;
368 
369 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
370 	}
371 
372 	mutex_unlock(&context->io_mutex);
373 
374 	if (selectInfos != NULL)
375 		deselect_select_infos(descriptor, selectInfos, true);
376 
377 	return disconnected ? NULL : descriptor;
378 }
379 
380 
381 static int
382 dup_fd(int fd, bool kernel)
383 {
384 	struct io_context* context = get_current_io_context(kernel);
385 	struct file_descriptor* descriptor;
386 	int status;
387 
388 	TRACE(("dup_fd: fd = %d\n", fd));
389 
390 	// Try to get the fd structure
391 	descriptor = get_fd(context, fd);
392 	if (descriptor == NULL)
393 		return B_FILE_ERROR;
394 
395 	// now put the fd in place
396 	status = new_fd(context, descriptor);
397 	if (status < 0)
398 		put_fd(descriptor);
399 	else {
400 		mutex_lock(&context->io_mutex);
401 		fd_set_close_on_exec(context, status, false);
402 		mutex_unlock(&context->io_mutex);
403 	}
404 
405 	return status;
406 }
407 
408 
409 /*!	POSIX says this should be the same as:
410 		close(newfd);
411 		fcntl(oldfd, F_DUPFD, newfd);
412 
413 	We do dup2() directly to be thread-safe.
414 */
415 static int
416 dup2_fd(int oldfd, int newfd, bool kernel)
417 {
418 	struct file_descriptor* evicted = NULL;
419 	struct io_context* context;
420 
421 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
422 
423 	// quick check
424 	if (oldfd < 0 || newfd < 0)
425 		return B_FILE_ERROR;
426 
427 	// Get current I/O context and lock it
428 	context = get_current_io_context(kernel);
429 	mutex_lock(&context->io_mutex);
430 
431 	// Check if the fds are valid (mutex must be locked because
432 	// the table size could be changed)
433 	if ((uint32)oldfd >= context->table_size
434 		|| (uint32)newfd >= context->table_size
435 		|| context->fds[oldfd] == NULL
436 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
437 		mutex_unlock(&context->io_mutex);
438 		return B_FILE_ERROR;
439 	}
440 
441 	// Check for identity, note that it cannot be made above
442 	// because we always want to return an error on invalid
443 	// handles
444 	select_info* selectInfos = NULL;
445 	if (oldfd != newfd) {
446 		// Now do the work
447 		TFD(Dup2FD(context, oldfd, newfd));
448 
449 		evicted = context->fds[newfd];
450 		selectInfos = context->select_infos[newfd];
451 		context->select_infos[newfd] = NULL;
452 		atomic_add(&context->fds[oldfd]->ref_count, 1);
453 		atomic_add(&context->fds[oldfd]->open_count, 1);
454 		context->fds[newfd] = context->fds[oldfd];
455 
456 		if (evicted == NULL)
457 			context->num_used_fds++;
458 	}
459 
460 	fd_set_close_on_exec(context, newfd, false);
461 
462 	mutex_unlock(&context->io_mutex);
463 
464 	// Say bye bye to the evicted fd
465 	if (evicted) {
466 		deselect_select_infos(evicted, selectInfos, true);
467 		close_fd(context, evicted);
468 		put_fd(evicted);
469 	}
470 
471 	return newfd;
472 }
473 
474 
475 /*!	Duplicates an FD from another team to this/the kernel team.
476 	\param fromTeam The team which owns the FD.
477 	\param fd The FD to duplicate.
478 	\param kernel If \c true, the new FD will be created in the kernel team,
479 			the current userland team otherwise.
480 	\return The newly created FD or an error code, if something went wrong.
481 */
482 int
483 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
484 {
485 	// get the I/O context for the team in question
486 	Team* team = Team::Get(fromTeam);
487 	if (team == NULL)
488 		return B_BAD_TEAM_ID;
489 	BReference<Team> teamReference(team, true);
490 
491 	io_context* fromContext = team->io_context;
492 
493 	// get the file descriptor
494 	file_descriptor* descriptor = get_fd(fromContext, fd);
495 	if (descriptor == NULL)
496 		return B_FILE_ERROR;
497 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
498 
499 	// create a new FD in the target I/O context
500 	int result = new_fd(get_current_io_context(kernel), descriptor);
501 	if (result >= 0) {
502 		// the descriptor reference belongs to the slot, now
503 		descriptorPutter.Detach();
504 	}
505 
506 	return result;
507 }
508 
509 
510 static status_t
511 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
512 {
513 	struct file_descriptor* descriptor;
514 	int status;
515 
516 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
517 	if (descriptor == NULL)
518 		return B_FILE_ERROR;
519 
520 	if (descriptor->ops->fd_ioctl)
521 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
522 	else
523 		status = B_DEV_INVALID_IOCTL;
524 
525 	if (status == B_DEV_INVALID_IOCTL)
526 		status = ENOTTY;
527 
528 	put_fd(descriptor);
529 	return status;
530 }
531 
532 
533 static void
534 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
535 	bool putSyncObjects)
536 {
537 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
538 
539 	select_info* info = infos;
540 	while (info != NULL) {
541 		select_sync* sync = info->sync;
542 
543 		// deselect the selected events
544 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
545 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
546 			for (uint16 event = 1; event < 16; event++) {
547 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
548 					descriptor->ops->fd_deselect(descriptor, event,
549 						(selectsync*)info);
550 				}
551 			}
552 		}
553 
554 		notify_select_events(info, B_EVENT_INVALID);
555 		info = info->next;
556 
557 		if (putSyncObjects)
558 			put_select_sync(sync);
559 	}
560 }
561 
562 
563 status_t
564 select_fd(int32 fd, struct select_info* info, bool kernel)
565 {
566 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
567 		info->sync, info->selected_events));
568 
569 	FDGetter fdGetter;
570 		// define before the context locker, so it will be destroyed after it
571 
572 	io_context* context = get_current_io_context(kernel);
573 	MutexLocker locker(context->io_mutex);
574 
575 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
576 	if (descriptor == NULL)
577 		return B_FILE_ERROR;
578 
579 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
580 
581 	if (descriptor->ops->fd_select == NULL) {
582 		// if the I/O subsystem doesn't support select(), we will
583 		// immediately notify the select call
584 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
585 		if (eventsToSelect != 0)
586 			return notify_select_events(info, eventsToSelect);
587 		else
588 			return B_OK;
589 	}
590 
591 	// We need the FD to stay open while we're doing this, so no select()/
592 	// deselect() will be called on it after it is closed.
593 	atomic_add(&descriptor->open_count, 1);
594 
595 	locker.Unlock();
596 
597 	// select any events asked for
598 	uint32 selectedEvents = 0;
599 
600 	for (uint16 event = 1; event < 16; event++) {
601 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
602 			&& descriptor->ops->fd_select(descriptor, event,
603 				(selectsync*)info) == B_OK) {
604 			selectedEvents |= SELECT_FLAG(event);
605 		}
606 	}
607 	info->selected_events = selectedEvents
608 		| (info->selected_events & B_EVENT_INVALID);
609 
610 	// Add the info to the IO context. Even if nothing has been selected -- we
611 	// always support B_EVENT_INVALID.
612 	locker.Lock();
613 	if (context->fds[fd] != descriptor) {
614 		// Someone close()d the index in the meantime. deselect() all
615 		// events.
616 		info->next = NULL;
617 		deselect_select_infos(descriptor, info, false);
618 
619 		// Release our open reference of the descriptor.
620 		close_fd(context, descriptor);
621 		return B_FILE_ERROR;
622 	}
623 
624 	// The FD index hasn't changed, so we add the select info to the table.
625 
626 	info->next = context->select_infos[fd];
627 	context->select_infos[fd] = info;
628 
629 	// As long as the info is in the list, we keep a reference to the sync
630 	// object.
631 	atomic_add(&info->sync->ref_count, 1);
632 
633 	// Finally release our open reference. It is safe just to decrement,
634 	// since as long as the descriptor is associated with the slot,
635 	// someone else still has it open.
636 	atomic_add(&descriptor->open_count, -1);
637 
638 	return B_OK;
639 }
640 
641 
642 status_t
643 deselect_fd(int32 fd, struct select_info* info, bool kernel)
644 {
645 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
646 		info->sync, info->selected_events));
647 
648 	FDGetter fdGetter;
649 		// define before the context locker, so it will be destroyed after it
650 
651 	io_context* context = get_current_io_context(kernel);
652 	MutexLocker locker(context->io_mutex);
653 
654 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
655 	if (descriptor == NULL)
656 		return B_FILE_ERROR;
657 
658 	// remove the info from the IO context
659 
660 	select_info** infoLocation = &context->select_infos[fd];
661 	while (*infoLocation != NULL && *infoLocation != info)
662 		infoLocation = &(*infoLocation)->next;
663 
664 	// If not found, someone else beat us to it.
665 	if (*infoLocation != info)
666 		return B_OK;
667 
668 	*infoLocation = info->next;
669 
670 	locker.Unlock();
671 
672 	// deselect the selected events
673 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
674 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
675 		for (uint16 event = 1; event < 16; event++) {
676 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
677 				descriptor->ops->fd_deselect(descriptor, event,
678 					(selectsync*)info);
679 			}
680 		}
681 	}
682 
683 	put_select_sync(info->sync);
684 
685 	return B_OK;
686 }
687 
688 
689 /*!	This function checks if the specified fd is valid in the current
690 	context. It can be used for a quick check; the fd is not locked
691 	so it could become invalid immediately after this check.
692 */
693 bool
694 fd_is_valid(int fd, bool kernel)
695 {
696 	struct file_descriptor* descriptor
697 		= get_fd(get_current_io_context(kernel), fd);
698 	if (descriptor == NULL)
699 		return false;
700 
701 	put_fd(descriptor);
702 	return true;
703 }
704 
705 
706 struct vnode*
707 fd_vnode(struct file_descriptor* descriptor)
708 {
709 	switch (descriptor->type) {
710 		case FDTYPE_FILE:
711 		case FDTYPE_DIR:
712 		case FDTYPE_ATTR_DIR:
713 		case FDTYPE_ATTR:
714 			return descriptor->u.vnode;
715 	}
716 
717 	return NULL;
718 }
719 
720 
721 static status_t
722 common_close(int fd, bool kernel)
723 {
724 	return close_fd_index(get_current_io_context(kernel), fd);
725 }
726 
727 
728 static ssize_t
729 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
730 {
731 	if (!IS_USER_ADDRESS(buffer))
732 		return B_BAD_ADDRESS;
733 
734 	if (pos < -1)
735 		return B_BAD_VALUE;
736 
737 	FDGetter fdGetter;
738 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
739 	if (!descriptor)
740 		return B_FILE_ERROR;
741 
742 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
743 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
744 		return B_FILE_ERROR;
745 	}
746 
747 	bool movePosition = false;
748 	if (pos == -1) {
749 		pos = descriptor->pos;
750 		movePosition = true;
751 	}
752 
753 	if (write ? descriptor->ops->fd_write == NULL
754 			: descriptor->ops->fd_read == NULL) {
755 		return B_BAD_VALUE;
756 	}
757 
758 	SyscallRestartWrapper<status_t> status;
759 
760 	if (write)
761 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
762 	else
763 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
764 
765 	if (status != B_OK)
766 		return status;
767 
768 	if (movePosition)
769 		descriptor->pos = pos + length;
770 
771 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
772 }
773 
774 
775 static ssize_t
776 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
777 	bool write)
778 {
779 	if (!IS_USER_ADDRESS(userVecs))
780 		return B_BAD_ADDRESS;
781 
782 	if (pos < -1)
783 		return B_BAD_VALUE;
784 
785 	// prevent integer overflow exploit in malloc()
786 	if (count > IOV_MAX)
787 		return B_BAD_VALUE;
788 
789 	FDGetter fdGetter;
790 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
791 	if (!descriptor)
792 		return B_FILE_ERROR;
793 
794 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
795 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
796 		return B_FILE_ERROR;
797 	}
798 
799 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
800 	if (vecs == NULL)
801 		return B_NO_MEMORY;
802 	MemoryDeleter _(vecs);
803 
804 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
805 		return B_BAD_ADDRESS;
806 
807 	bool movePosition = false;
808 	if (pos == -1) {
809 		pos = descriptor->pos;
810 		movePosition = true;
811 	}
812 
813 	if (write ? descriptor->ops->fd_write == NULL
814 			: descriptor->ops->fd_read == NULL) {
815 		return B_BAD_VALUE;
816 	}
817 
818 	SyscallRestartWrapper<status_t> status;
819 
820 	ssize_t bytesTransferred = 0;
821 	for (uint32 i = 0; i < count; i++) {
822 		size_t length = vecs[i].iov_len;
823 		if (write) {
824 			status = descriptor->ops->fd_write(descriptor, pos,
825 				vecs[i].iov_base, &length);
826 		} else {
827 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
828 				&length);
829 		}
830 
831 		if (status != B_OK) {
832 			if (bytesTransferred == 0)
833 				return status;
834 			status = B_OK;
835 			break;
836 		}
837 
838 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
839 			bytesTransferred = SSIZE_MAX;
840 		else
841 			bytesTransferred += (ssize_t)length;
842 
843 		pos += length;
844 
845 		if (length < vecs[i].iov_len)
846 			break;
847 	}
848 
849 	if (movePosition)
850 		descriptor->pos = pos;
851 
852 	return bytesTransferred;
853 }
854 
855 
856 status_t
857 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
858 {
859 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
860 
861 	return fd_ioctl(false, fd, op, buffer, length);
862 }
863 
864 
865 //	#pragma mark - User syscalls
866 
867 
868 ssize_t
869 _user_read(int fd, off_t pos, void* buffer, size_t length)
870 {
871 	return common_user_io(fd, pos, buffer, length, false);
872 }
873 
874 
875 ssize_t
876 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
877 {
878 	return common_user_vector_io(fd, pos, userVecs, count, false);
879 }
880 
881 
882 ssize_t
883 _user_write(int fd, off_t pos, const void* buffer, size_t length)
884 {
885 	return common_user_io(fd, pos, (void*)buffer, length, true);
886 }
887 
888 
889 ssize_t
890 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
891 {
892 	return common_user_vector_io(fd, pos, userVecs, count, true);
893 }
894 
895 
896 off_t
897 _user_seek(int fd, off_t pos, int seekType)
898 {
899 	syscall_64_bit_return_value();
900 
901 	struct file_descriptor* descriptor;
902 
903 	descriptor = get_fd(get_current_io_context(false), fd);
904 	if (!descriptor)
905 		return B_FILE_ERROR;
906 
907 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
908 
909 	if (descriptor->ops->fd_seek)
910 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
911 	else
912 		pos = ESPIPE;
913 
914 	put_fd(descriptor);
915 	return pos;
916 }
917 
918 
919 status_t
920 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
921 {
922 	if (buffer != NULL && !IS_USER_ADDRESS(buffer))
923 		return B_BAD_ADDRESS;
924 
925 	TRACE(("user_ioctl: fd %d\n", fd));
926 
927 	SyscallRestartWrapper<status_t> status;
928 
929 	return status = fd_ioctl(false, fd, op, buffer, length);
930 }
931 
932 
933 ssize_t
934 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
935 	uint32 maxCount)
936 {
937 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
938 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
939 
940 	if (maxCount == 0)
941 		return 0;
942 
943 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
944 		return B_BAD_ADDRESS;
945 
946 	// get I/O context and FD
947 	io_context* ioContext = get_current_io_context(false);
948 	FDGetter fdGetter;
949 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
950 	if (descriptor == NULL)
951 		return B_FILE_ERROR;
952 
953 	if (descriptor->ops->fd_read_dir == NULL)
954 		return B_UNSUPPORTED;
955 
956 	// restrict buffer size and allocate a heap buffer
957 	if (bufferSize > kMaxReadDirBufferSize)
958 		bufferSize = kMaxReadDirBufferSize;
959 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
960 	if (buffer == NULL)
961 		return B_NO_MEMORY;
962 	MemoryDeleter bufferDeleter(buffer);
963 
964 	// read the directory
965 	uint32 count = maxCount;
966 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
967 		buffer, bufferSize, &count);
968 	if (status != B_OK)
969 		return status;
970 
971 	// copy the buffer back -- determine the total buffer size first
972 	size_t sizeToCopy = 0;
973 	BytePointer<struct dirent> entry = buffer;
974 	for (uint32 i = 0; i < count; i++) {
975 		size_t length = entry->d_reclen;
976 		sizeToCopy += length;
977 		entry += length;
978 	}
979 
980 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
981 		return B_BAD_ADDRESS;
982 
983 	return count;
984 }
985 
986 
987 status_t
988 _user_rewind_dir(int fd)
989 {
990 	struct file_descriptor* descriptor;
991 	status_t status;
992 
993 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
994 
995 	descriptor = get_fd(get_current_io_context(false), fd);
996 	if (descriptor == NULL)
997 		return B_FILE_ERROR;
998 
999 	if (descriptor->ops->fd_rewind_dir)
1000 		status = descriptor->ops->fd_rewind_dir(descriptor);
1001 	else
1002 		status = B_UNSUPPORTED;
1003 
1004 	put_fd(descriptor);
1005 	return status;
1006 }
1007 
1008 
1009 status_t
1010 _user_close(int fd)
1011 {
1012 	return common_close(fd, false);
1013 }
1014 
1015 
1016 int
1017 _user_dup(int fd)
1018 {
1019 	return dup_fd(fd, false);
1020 }
1021 
1022 
1023 int
1024 _user_dup2(int ofd, int nfd)
1025 {
1026 	return dup2_fd(ofd, nfd, false);
1027 }
1028 
1029 
1030 //	#pragma mark - Kernel calls
1031 
1032 
1033 ssize_t
1034 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1035 {
1036 	if (pos < -1)
1037 		return B_BAD_VALUE;
1038 
1039 	FDGetter fdGetter;
1040 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1041 
1042 	if (!descriptor)
1043 		return B_FILE_ERROR;
1044 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1045 		return B_FILE_ERROR;
1046 
1047 	bool movePosition = false;
1048 	if (pos == -1) {
1049 		pos = descriptor->pos;
1050 		movePosition = true;
1051 	}
1052 
1053 	SyscallFlagUnsetter _;
1054 
1055 	if (descriptor->ops->fd_read == NULL)
1056 		return B_BAD_VALUE;
1057 
1058 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1059 		&length);
1060 	if (bytesRead >= B_OK) {
1061 		if (length > SSIZE_MAX)
1062 			bytesRead = SSIZE_MAX;
1063 		else
1064 			bytesRead = (ssize_t)length;
1065 
1066 		if (movePosition)
1067 			descriptor->pos = pos + length;
1068 	}
1069 
1070 	return bytesRead;
1071 }
1072 
1073 
1074 ssize_t
1075 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1076 {
1077 	bool movePosition = false;
1078 	status_t status;
1079 	uint32 i;
1080 
1081 	if (pos < -1)
1082 		return B_BAD_VALUE;
1083 
1084 	FDGetter fdGetter;
1085 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1086 
1087 	if (!descriptor)
1088 		return B_FILE_ERROR;
1089 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1090 		return B_FILE_ERROR;
1091 
1092 	if (pos == -1) {
1093 		pos = descriptor->pos;
1094 		movePosition = true;
1095 	}
1096 
1097 	if (descriptor->ops->fd_read == NULL)
1098 		return B_BAD_VALUE;
1099 
1100 	SyscallFlagUnsetter _;
1101 
1102 	ssize_t bytesRead = 0;
1103 
1104 	for (i = 0; i < count; i++) {
1105 		size_t length = vecs[i].iov_len;
1106 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1107 			&length);
1108 		if (status != B_OK) {
1109 			bytesRead = status;
1110 			break;
1111 		}
1112 
1113 		if ((uint64)bytesRead + length > SSIZE_MAX)
1114 			bytesRead = SSIZE_MAX;
1115 		else
1116 			bytesRead += (ssize_t)length;
1117 
1118 		pos += vecs[i].iov_len;
1119 	}
1120 
1121 	if (movePosition)
1122 		descriptor->pos = pos;
1123 
1124 	return bytesRead;
1125 }
1126 
1127 
1128 ssize_t
1129 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1130 {
1131 	if (pos < -1)
1132 		return B_BAD_VALUE;
1133 
1134 	FDGetter fdGetter;
1135 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1136 
1137 	if (descriptor == NULL)
1138 		return B_FILE_ERROR;
1139 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1140 		return B_FILE_ERROR;
1141 
1142 	bool movePosition = false;
1143 	if (pos == -1) {
1144 		pos = descriptor->pos;
1145 		movePosition = true;
1146 	}
1147 
1148 	if (descriptor->ops->fd_write == NULL)
1149 		return B_BAD_VALUE;
1150 
1151 	SyscallFlagUnsetter _;
1152 
1153 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1154 		&length);
1155 	if (bytesWritten >= B_OK) {
1156 		if (length > SSIZE_MAX)
1157 			bytesWritten = SSIZE_MAX;
1158 		else
1159 			bytesWritten = (ssize_t)length;
1160 
1161 		if (movePosition)
1162 			descriptor->pos = pos + length;
1163 	}
1164 
1165 	return bytesWritten;
1166 }
1167 
1168 
1169 ssize_t
1170 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1171 {
1172 	bool movePosition = false;
1173 	status_t status;
1174 	uint32 i;
1175 
1176 	if (pos < -1)
1177 		return B_BAD_VALUE;
1178 
1179 	FDGetter fdGetter;
1180 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1181 
1182 	if (!descriptor)
1183 		return B_FILE_ERROR;
1184 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1185 		return B_FILE_ERROR;
1186 
1187 	if (pos == -1) {
1188 		pos = descriptor->pos;
1189 		movePosition = true;
1190 	}
1191 
1192 	if (descriptor->ops->fd_write == NULL)
1193 		return B_BAD_VALUE;
1194 
1195 	SyscallFlagUnsetter _;
1196 
1197 	ssize_t bytesWritten = 0;
1198 
1199 	for (i = 0; i < count; i++) {
1200 		size_t length = vecs[i].iov_len;
1201 		status = descriptor->ops->fd_write(descriptor, pos,
1202 			vecs[i].iov_base, &length);
1203 		if (status != B_OK) {
1204 			bytesWritten = status;
1205 			break;
1206 		}
1207 
1208 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1209 			bytesWritten = SSIZE_MAX;
1210 		else
1211 			bytesWritten += (ssize_t)length;
1212 
1213 		pos += vecs[i].iov_len;
1214 	}
1215 
1216 	if (movePosition)
1217 		descriptor->pos = pos;
1218 
1219 	return bytesWritten;
1220 }
1221 
1222 
1223 off_t
1224 _kern_seek(int fd, off_t pos, int seekType)
1225 {
1226 	struct file_descriptor* descriptor;
1227 
1228 	descriptor = get_fd(get_current_io_context(true), fd);
1229 	if (!descriptor)
1230 		return B_FILE_ERROR;
1231 
1232 	if (descriptor->ops->fd_seek)
1233 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1234 	else
1235 		pos = ESPIPE;
1236 
1237 	put_fd(descriptor);
1238 	return pos;
1239 }
1240 
1241 
1242 status_t
1243 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1244 {
1245 	TRACE(("kern_ioctl: fd %d\n", fd));
1246 
1247 	SyscallFlagUnsetter _;
1248 
1249 	return fd_ioctl(true, fd, op, buffer, length);
1250 }
1251 
1252 
1253 ssize_t
1254 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1255 	uint32 maxCount)
1256 {
1257 	struct file_descriptor* descriptor;
1258 	ssize_t retval;
1259 
1260 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1261 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1262 
1263 	struct io_context* ioContext = get_current_io_context(true);
1264 	descriptor = get_fd(ioContext, fd);
1265 	if (descriptor == NULL)
1266 		return B_FILE_ERROR;
1267 
1268 	if (descriptor->ops->fd_read_dir) {
1269 		uint32 count = maxCount;
1270 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1271 			bufferSize, &count);
1272 		if (retval >= 0)
1273 			retval = count;
1274 	} else
1275 		retval = B_UNSUPPORTED;
1276 
1277 	put_fd(descriptor);
1278 	return retval;
1279 }
1280 
1281 
1282 status_t
1283 _kern_rewind_dir(int fd)
1284 {
1285 	struct file_descriptor* descriptor;
1286 	status_t status;
1287 
1288 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1289 
1290 	descriptor = get_fd(get_current_io_context(true), fd);
1291 	if (descriptor == NULL)
1292 		return B_FILE_ERROR;
1293 
1294 	if (descriptor->ops->fd_rewind_dir)
1295 		status = descriptor->ops->fd_rewind_dir(descriptor);
1296 	else
1297 		status = B_UNSUPPORTED;
1298 
1299 	put_fd(descriptor);
1300 	return status;
1301 }
1302 
1303 
1304 status_t
1305 _kern_close(int fd)
1306 {
1307 	return common_close(fd, true);
1308 }
1309 
1310 
1311 int
1312 _kern_dup(int fd)
1313 {
1314 	return dup_fd(fd, true);
1315 }
1316 
1317 
1318 int
1319 _kern_dup2(int ofd, int nfd)
1320 {
1321 	return dup2_fd(ofd, nfd, true);
1322 }
1323 
1324