xref: /haiku/src/system/kernel/fs/fd.cpp (revision f7c507c3a6fbf3a44c59500543926a9088724968)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 
20 #include <syscalls.h>
21 #include <syscall_restart.h>
22 #include <util/AutoLock.h>
23 #include <vfs.h>
24 #include <wait_for_objects.h>
25 
26 #include "vfs_tracing.h"
27 
28 
29 //#define TRACE_FD
30 #ifdef TRACE_FD
31 #	define TRACE(x) dprintf x
32 #else
33 #	define TRACE(x)
34 #endif
35 
36 
37 static const size_t kMaxReadDirBufferSize = 64 * 1024;
38 
39 
40 static struct file_descriptor* get_fd_locked(struct io_context* context,
41 	int fd);
42 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
43 static void deselect_select_infos(file_descriptor* descriptor,
44 	select_info* infos, bool putSyncObjects);
45 
46 
47 struct FDGetterLocking {
48 	inline bool Lock(file_descriptor* /*lockable*/)
49 	{
50 		return false;
51 	}
52 
53 	inline void Unlock(file_descriptor* lockable)
54 	{
55 		put_fd(lockable);
56 	}
57 };
58 
59 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
60 public:
61 	inline FDGetter()
62 		: AutoLocker<file_descriptor, FDGetterLocking>()
63 	{
64 	}
65 
66 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
67 		: AutoLocker<file_descriptor, FDGetterLocking>(
68 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
69 	{
70 	}
71 
72 	inline file_descriptor* SetTo(io_context* context, int fd,
73 		bool contextLocked = false)
74 	{
75 		file_descriptor* descriptor
76 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
77 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
78 		return descriptor;
79 	}
80 
81 	inline file_descriptor* SetTo(int fd, bool kernel,
82 		bool contextLocked = false)
83 	{
84 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
85 	}
86 
87 	inline file_descriptor* FD() const
88 	{
89 		return fLockable;
90 	}
91 };
92 
93 
94 //	#pragma mark - General fd routines
95 
96 
97 #ifdef DEBUG
98 void dump_fd(int fd, struct file_descriptor* descriptor);
99 
100 void
101 dump_fd(int fd,struct file_descriptor* descriptor)
102 {
103 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
104 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
105 		", pos = %" B_PRId64 "\n",
106 		fd, descriptor, descriptor->type, descriptor->ref_count,
107 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
108 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
109 }
110 #endif
111 
112 
113 /*! Allocates and initializes a new file_descriptor.
114 */
115 struct file_descriptor*
116 alloc_fd(void)
117 {
118 	file_descriptor* descriptor
119 		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
120 	if (descriptor == NULL)
121 		return NULL;
122 
123 	descriptor->u.vnode = NULL;
124 	descriptor->cookie = NULL;
125 	descriptor->ref_count = 1;
126 	descriptor->open_count = 0;
127 	descriptor->open_mode = 0;
128 	descriptor->pos = 0;
129 
130 	return descriptor;
131 }
132 
133 
134 bool
135 fd_close_on_exec(struct io_context* context, int fd)
136 {
137 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
138 }
139 
140 
141 void
142 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
143 {
144 	if (closeFD)
145 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
146 	else
147 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
148 }
149 
150 
151 /*!	Searches a free slot in the FD table of the provided I/O context, and
152 	inserts the specified descriptor into it.
153 */
154 int
155 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
156 	int firstIndex)
157 {
158 	int fd = -1;
159 	uint32 i;
160 
161 	mutex_lock(&context->io_mutex);
162 
163 	for (i = firstIndex; i < context->table_size; i++) {
164 		if (!context->fds[i]) {
165 			fd = i;
166 			break;
167 		}
168 	}
169 	if (fd < 0) {
170 		fd = B_NO_MORE_FDS;
171 		goto err;
172 	}
173 
174 	TFD(NewFD(context, fd, descriptor));
175 
176 	context->fds[fd] = descriptor;
177 	context->num_used_fds++;
178 	atomic_add(&descriptor->open_count, 1);
179 
180 err:
181 	mutex_unlock(&context->io_mutex);
182 
183 	return fd;
184 }
185 
186 
187 int
188 new_fd(struct io_context* context, struct file_descriptor* descriptor)
189 {
190 	return new_fd_etc(context, descriptor, 0);
191 }
192 
193 
194 /*!	Reduces the descriptor's reference counter, and frees all resources
195 	when it's no longer used.
196 */
197 void
198 put_fd(struct file_descriptor* descriptor)
199 {
200 	int32 previous = atomic_add(&descriptor->ref_count, -1);
201 
202 	TFD(PutFD(descriptor));
203 
204 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
205 		descriptor, descriptor->ref_count, descriptor->cookie));
206 
207 	// free the descriptor if we don't need it anymore
208 	if (previous == 1) {
209 		// free the underlying object
210 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
211 			descriptor->ops->fd_free(descriptor);
212 
213 		free(descriptor);
214 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
215 		&& previous - 1 == descriptor->open_count
216 		&& descriptor->ops != NULL) {
217 		// the descriptor has been disconnected - it cannot
218 		// be accessed anymore, let's close it (no one is
219 		// currently accessing this descriptor)
220 
221 		if (descriptor->ops->fd_close)
222 			descriptor->ops->fd_close(descriptor);
223 		if (descriptor->ops->fd_free)
224 			descriptor->ops->fd_free(descriptor);
225 
226 		// prevent this descriptor from being closed/freed again
227 		descriptor->ops = NULL;
228 		descriptor->u.vnode = NULL;
229 
230 		// the file descriptor is kept intact, so that it's not
231 		// reused until someone explicitly closes it
232 	}
233 }
234 
235 
236 /*!	Decrements the open counter of the file descriptor and invokes
237 	its close hook when appropriate.
238 */
239 void
240 close_fd(struct io_context* context, struct file_descriptor* descriptor)
241 {
242 	// POSIX advisory locks need to be released when any file descriptor closes
243 	if (descriptor->type == FDTYPE_FILE)
244 		vfs_release_posix_lock(context, descriptor);
245 
246 	if (atomic_add(&descriptor->open_count, -1) == 1) {
247 		vfs_unlock_vnode_if_locked(descriptor);
248 
249 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
250 			descriptor->ops->fd_close(descriptor);
251 	}
252 }
253 
254 
255 status_t
256 close_fd_index(struct io_context* context, int fd)
257 {
258 	struct file_descriptor* descriptor = remove_fd(context, fd);
259 
260 	if (descriptor == NULL)
261 		return B_FILE_ERROR;
262 
263 	close_fd(context, descriptor);
264 	put_fd(descriptor);
265 		// the reference associated with the slot
266 
267 	return B_OK;
268 }
269 
270 
271 /*!	This descriptor's underlying object will be closed and freed as soon as
272 	possible (in one of the next calls to put_fd() - get_fd() will no longer
273 	succeed on this descriptor).
274 	This is useful if the underlying object is gone, for instance when a
275 	(mounted) volume got removed unexpectedly.
276 */
277 void
278 disconnect_fd(struct file_descriptor* descriptor)
279 {
280 	descriptor->open_mode |= O_DISCONNECTED;
281 }
282 
283 
284 void
285 inc_fd_ref_count(struct file_descriptor* descriptor)
286 {
287 	atomic_add(&descriptor->ref_count, 1);
288 }
289 
290 
291 static struct file_descriptor*
292 get_fd_locked(struct io_context* context, int fd)
293 {
294 	if (fd < 0 || (uint32)fd >= context->table_size)
295 		return NULL;
296 
297 	struct file_descriptor* descriptor = context->fds[fd];
298 
299 	if (descriptor != NULL) {
300 		TFD(GetFD(context, fd, descriptor));
301 		inc_fd_ref_count(descriptor);
302 	}
303 
304 	return descriptor;
305 }
306 
307 
308 struct file_descriptor*
309 get_fd(struct io_context* context, int fd)
310 {
311 	MutexLocker _(context->io_mutex);
312 
313 	return get_fd_locked(context, fd);
314 }
315 
316 
317 struct file_descriptor*
318 get_open_fd(struct io_context* context, int fd)
319 {
320 	MutexLocker _(context->io_mutex);
321 
322 	file_descriptor* descriptor = get_fd_locked(context, fd);
323 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
324 		return NULL;
325 
326 	atomic_add(&descriptor->open_count, 1);
327 
328 	return descriptor;
329 }
330 
331 
332 /*!	Removes the file descriptor from the specified slot.
333 */
334 static struct file_descriptor*
335 remove_fd(struct io_context* context, int fd)
336 {
337 	struct file_descriptor* descriptor = NULL;
338 
339 	if (fd < 0)
340 		return NULL;
341 
342 	mutex_lock(&context->io_mutex);
343 
344 	if ((uint32)fd < context->table_size)
345 		descriptor = context->fds[fd];
346 
347 	select_info* selectInfos = NULL;
348 	bool disconnected = false;
349 
350 	if (descriptor != NULL)	{
351 		// fd is valid
352 		TFD(RemoveFD(context, fd, descriptor));
353 
354 		context->fds[fd] = NULL;
355 		fd_set_close_on_exec(context, fd, false);
356 		context->num_used_fds--;
357 
358 		selectInfos = context->select_infos[fd];
359 		context->select_infos[fd] = NULL;
360 
361 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
362 	}
363 
364 	mutex_unlock(&context->io_mutex);
365 
366 	if (selectInfos != NULL)
367 		deselect_select_infos(descriptor, selectInfos, true);
368 
369 	return disconnected ? NULL : descriptor;
370 }
371 
372 
373 static int
374 dup_fd(int fd, bool kernel)
375 {
376 	struct io_context* context = get_current_io_context(kernel);
377 	struct file_descriptor* descriptor;
378 	int status;
379 
380 	TRACE(("dup_fd: fd = %d\n", fd));
381 
382 	// Try to get the fd structure
383 	descriptor = get_fd(context, fd);
384 	if (descriptor == NULL)
385 		return B_FILE_ERROR;
386 
387 	// now put the fd in place
388 	status = new_fd(context, descriptor);
389 	if (status < 0)
390 		put_fd(descriptor);
391 	else {
392 		mutex_lock(&context->io_mutex);
393 		fd_set_close_on_exec(context, status, false);
394 		mutex_unlock(&context->io_mutex);
395 	}
396 
397 	return status;
398 }
399 
400 
401 /*!	POSIX says this should be the same as:
402 		close(newfd);
403 		fcntl(oldfd, F_DUPFD, newfd);
404 
405 	We do dup2() directly to be thread-safe.
406 */
407 static int
408 dup2_fd(int oldfd, int newfd, bool kernel)
409 {
410 	struct file_descriptor* evicted = NULL;
411 	struct io_context* context;
412 
413 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
414 
415 	// quick check
416 	if (oldfd < 0 || newfd < 0)
417 		return B_FILE_ERROR;
418 
419 	// Get current I/O context and lock it
420 	context = get_current_io_context(kernel);
421 	mutex_lock(&context->io_mutex);
422 
423 	// Check if the fds are valid (mutex must be locked because
424 	// the table size could be changed)
425 	if ((uint32)oldfd >= context->table_size
426 		|| (uint32)newfd >= context->table_size
427 		|| context->fds[oldfd] == NULL
428 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
429 		mutex_unlock(&context->io_mutex);
430 		return B_FILE_ERROR;
431 	}
432 
433 	// Check for identity, note that it cannot be made above
434 	// because we always want to return an error on invalid
435 	// handles
436 	select_info* selectInfos = NULL;
437 	if (oldfd != newfd) {
438 		// Now do the work
439 		TFD(Dup2FD(context, oldfd, newfd));
440 
441 		evicted = context->fds[newfd];
442 		selectInfos = context->select_infos[newfd];
443 		context->select_infos[newfd] = NULL;
444 		atomic_add(&context->fds[oldfd]->ref_count, 1);
445 		atomic_add(&context->fds[oldfd]->open_count, 1);
446 		context->fds[newfd] = context->fds[oldfd];
447 
448 		if (evicted == NULL)
449 			context->num_used_fds++;
450 	}
451 
452 	fd_set_close_on_exec(context, newfd, false);
453 
454 	mutex_unlock(&context->io_mutex);
455 
456 	// Say bye bye to the evicted fd
457 	if (evicted) {
458 		deselect_select_infos(evicted, selectInfos, true);
459 		close_fd(context, evicted);
460 		put_fd(evicted);
461 	}
462 
463 	return newfd;
464 }
465 
466 
467 /*!	Duplicates an FD from another team to this/the kernel team.
468 	\param fromTeam The team which owns the FD.
469 	\param fd The FD to duplicate.
470 	\param kernel If \c true, the new FD will be created in the kernel team,
471 			the current userland team otherwise.
472 	\return The newly created FD or an error code, if something went wrong.
473 */
474 int
475 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
476 {
477 	// get the I/O context for the team in question
478 	Team* team = Team::Get(fromTeam);
479 	if (team == NULL)
480 		return B_BAD_TEAM_ID;
481 	BReference<Team> teamReference(team, true);
482 
483 	io_context* fromContext = team->io_context;
484 
485 	// get the file descriptor
486 	file_descriptor* descriptor = get_fd(fromContext, fd);
487 	if (descriptor == NULL)
488 		return B_FILE_ERROR;
489 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
490 
491 	// create a new FD in the target I/O context
492 	int result = new_fd(get_current_io_context(kernel), descriptor);
493 	if (result >= 0) {
494 		// the descriptor reference belongs to the slot, now
495 		descriptorPutter.Detach();
496 	}
497 
498 	return result;
499 }
500 
501 
502 static status_t
503 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
504 {
505 	struct file_descriptor* descriptor;
506 	int status;
507 
508 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
509 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
510 		return B_FILE_ERROR;
511 
512 	if (descriptor->ops->fd_ioctl != NULL)
513 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
514 	else
515 		status = B_DEV_INVALID_IOCTL;
516 
517 	if (status == B_DEV_INVALID_IOCTL)
518 		status = ENOTTY;
519 
520 	put_fd(descriptor);
521 	return status;
522 }
523 
524 
525 static void
526 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
527 	bool putSyncObjects)
528 {
529 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
530 
531 	select_info* info = infos;
532 	while (info != NULL) {
533 		select_sync* sync = info->sync;
534 
535 		// deselect the selected events
536 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
537 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
538 			for (uint16 event = 1; event < 16; event++) {
539 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
540 					descriptor->ops->fd_deselect(descriptor, event,
541 						(selectsync*)info);
542 				}
543 			}
544 		}
545 
546 		notify_select_events(info, B_EVENT_INVALID);
547 		info = info->next;
548 
549 		if (putSyncObjects)
550 			put_select_sync(sync);
551 	}
552 }
553 
554 
555 status_t
556 select_fd(int32 fd, struct select_info* info, bool kernel)
557 {
558 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
559 		info->sync, info->selected_events));
560 
561 	FDGetter fdGetter;
562 		// define before the context locker, so it will be destroyed after it
563 
564 	io_context* context = get_current_io_context(kernel);
565 	MutexLocker locker(context->io_mutex);
566 
567 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
568 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
569 		return B_FILE_ERROR;
570 
571 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
572 
573 	if (descriptor->ops->fd_select == NULL) {
574 		// if the I/O subsystem doesn't support select(), we will
575 		// immediately notify the select call
576 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
577 		if (eventsToSelect != 0)
578 			return notify_select_events(info, eventsToSelect);
579 		else
580 			return B_OK;
581 	}
582 
583 	// We need the FD to stay open while we're doing this, so no select()/
584 	// deselect() will be called on it after it is closed.
585 	atomic_add(&descriptor->open_count, 1);
586 
587 	locker.Unlock();
588 
589 	// select any events asked for
590 	uint32 selectedEvents = 0;
591 
592 	for (uint16 event = 1; event < 16; event++) {
593 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
594 			&& descriptor->ops->fd_select(descriptor, event,
595 				(selectsync*)info) == B_OK) {
596 			selectedEvents |= SELECT_FLAG(event);
597 		}
598 	}
599 	info->selected_events = selectedEvents
600 		| (info->selected_events & B_EVENT_INVALID);
601 
602 	// Add the info to the IO context. Even if nothing has been selected -- we
603 	// always support B_EVENT_INVALID.
604 	locker.Lock();
605 	if (context->fds[fd] != descriptor) {
606 		// Someone close()d the index in the meantime. deselect() all
607 		// events.
608 		info->next = NULL;
609 		deselect_select_infos(descriptor, info, false);
610 
611 		// Release our open reference of the descriptor.
612 		close_fd(context, descriptor);
613 		return B_FILE_ERROR;
614 	}
615 
616 	// The FD index hasn't changed, so we add the select info to the table.
617 
618 	info->next = context->select_infos[fd];
619 	context->select_infos[fd] = info;
620 
621 	// As long as the info is in the list, we keep a reference to the sync
622 	// object.
623 	atomic_add(&info->sync->ref_count, 1);
624 
625 	// Finally release our open reference. It is safe just to decrement,
626 	// since as long as the descriptor is associated with the slot,
627 	// someone else still has it open.
628 	atomic_add(&descriptor->open_count, -1);
629 
630 	return B_OK;
631 }
632 
633 
634 status_t
635 deselect_fd(int32 fd, struct select_info* info, bool kernel)
636 {
637 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
638 		info->sync, info->selected_events));
639 
640 	FDGetter fdGetter;
641 		// define before the context locker, so it will be destroyed after it
642 
643 	io_context* context = get_current_io_context(kernel);
644 	MutexLocker locker(context->io_mutex);
645 
646 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
647 	if (descriptor == NULL)
648 		return B_FILE_ERROR;
649 
650 	// remove the info from the IO context
651 
652 	select_info** infoLocation = &context->select_infos[fd];
653 	while (*infoLocation != NULL && *infoLocation != info)
654 		infoLocation = &(*infoLocation)->next;
655 
656 	// If not found, someone else beat us to it.
657 	if (*infoLocation != info)
658 		return B_OK;
659 
660 	*infoLocation = info->next;
661 
662 	locker.Unlock();
663 
664 	// deselect the selected events
665 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
666 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
667 		for (uint16 event = 1; event < 16; event++) {
668 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
669 				descriptor->ops->fd_deselect(descriptor, event,
670 					(selectsync*)info);
671 			}
672 		}
673 	}
674 
675 	put_select_sync(info->sync);
676 
677 	return B_OK;
678 }
679 
680 
681 /*!	This function checks if the specified fd is valid in the current
682 	context. It can be used for a quick check; the fd is not locked
683 	so it could become invalid immediately after this check.
684 */
685 bool
686 fd_is_valid(int fd, bool kernel)
687 {
688 	struct file_descriptor* descriptor
689 		= get_fd(get_current_io_context(kernel), fd);
690 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
691 		return false;
692 
693 	put_fd(descriptor);
694 	return true;
695 }
696 
697 
698 struct vnode*
699 fd_vnode(struct file_descriptor* descriptor)
700 {
701 	switch (descriptor->type) {
702 		case FDTYPE_FILE:
703 		case FDTYPE_DIR:
704 		case FDTYPE_ATTR_DIR:
705 		case FDTYPE_ATTR:
706 			return descriptor->u.vnode;
707 	}
708 
709 	return NULL;
710 }
711 
712 
713 static status_t
714 common_close(int fd, bool kernel)
715 {
716 	return close_fd_index(get_current_io_context(kernel), fd);
717 }
718 
719 
720 static ssize_t
721 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
722 {
723 	if (!IS_USER_ADDRESS(buffer))
724 		return B_BAD_ADDRESS;
725 
726 	if (pos < -1)
727 		return B_BAD_VALUE;
728 
729 	FDGetter fdGetter;
730 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
731 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
732 		return B_FILE_ERROR;
733 
734 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
735 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
736 		return B_FILE_ERROR;
737 	}
738 
739 	bool movePosition = false;
740 	if (pos == -1) {
741 		pos = descriptor->pos;
742 		movePosition = true;
743 	}
744 
745 	if (write ? descriptor->ops->fd_write == NULL
746 			: descriptor->ops->fd_read == NULL) {
747 		return B_BAD_VALUE;
748 	}
749 
750 	SyscallRestartWrapper<status_t> status;
751 
752 	if (write)
753 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
754 	else
755 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
756 
757 	if (status != B_OK)
758 		return status;
759 
760 	if (movePosition)
761 		descriptor->pos = pos + length;
762 
763 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
764 }
765 
766 
767 static ssize_t
768 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
769 	bool write)
770 {
771 	if (!IS_USER_ADDRESS(userVecs))
772 		return B_BAD_ADDRESS;
773 
774 	if (pos < -1)
775 		return B_BAD_VALUE;
776 
777 	// prevent integer overflow exploit in malloc()
778 	if (count > IOV_MAX)
779 		return B_BAD_VALUE;
780 
781 	FDGetter fdGetter;
782 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
783 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
784 		return B_FILE_ERROR;
785 
786 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
787 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
788 		return B_FILE_ERROR;
789 	}
790 
791 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
792 	if (vecs == NULL)
793 		return B_NO_MEMORY;
794 	MemoryDeleter _(vecs);
795 
796 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
797 		return B_BAD_ADDRESS;
798 
799 	bool movePosition = false;
800 	if (pos == -1) {
801 		pos = descriptor->pos;
802 		movePosition = true;
803 	}
804 
805 	if (write ? descriptor->ops->fd_write == NULL
806 			: descriptor->ops->fd_read == NULL) {
807 		return B_BAD_VALUE;
808 	}
809 
810 	SyscallRestartWrapper<status_t> status;
811 
812 	ssize_t bytesTransferred = 0;
813 	for (uint32 i = 0; i < count; i++) {
814 		size_t length = vecs[i].iov_len;
815 		if (write) {
816 			status = descriptor->ops->fd_write(descriptor, pos,
817 				vecs[i].iov_base, &length);
818 		} else {
819 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
820 				&length);
821 		}
822 
823 		if (status != B_OK) {
824 			if (bytesTransferred == 0)
825 				return status;
826 			status = B_OK;
827 			break;
828 		}
829 
830 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
831 			bytesTransferred = SSIZE_MAX;
832 		else
833 			bytesTransferred += (ssize_t)length;
834 
835 		pos += length;
836 
837 		if (length < vecs[i].iov_len)
838 			break;
839 	}
840 
841 	if (movePosition)
842 		descriptor->pos = pos;
843 
844 	return bytesTransferred;
845 }
846 
847 
848 status_t
849 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
850 {
851 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
852 
853 	return fd_ioctl(false, fd, op, buffer, length);
854 }
855 
856 
857 //	#pragma mark - User syscalls
858 
859 
860 ssize_t
861 _user_read(int fd, off_t pos, void* buffer, size_t length)
862 {
863 	return common_user_io(fd, pos, buffer, length, false);
864 }
865 
866 
867 ssize_t
868 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
869 {
870 	return common_user_vector_io(fd, pos, userVecs, count, false);
871 }
872 
873 
874 ssize_t
875 _user_write(int fd, off_t pos, const void* buffer, size_t length)
876 {
877 	return common_user_io(fd, pos, (void*)buffer, length, true);
878 }
879 
880 
881 ssize_t
882 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
883 {
884 	return common_user_vector_io(fd, pos, userVecs, count, true);
885 }
886 
887 
888 off_t
889 _user_seek(int fd, off_t pos, int seekType)
890 {
891 	syscall_64_bit_return_value();
892 
893 	struct file_descriptor* descriptor;
894 
895 	descriptor = get_fd(get_current_io_context(false), fd);
896 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
897 		return B_FILE_ERROR;
898 
899 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
900 
901 	if (descriptor->ops->fd_seek != NULL)
902 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
903 	else
904 		pos = ESPIPE;
905 
906 	put_fd(descriptor);
907 	return pos;
908 }
909 
910 
911 status_t
912 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
913 {
914 	if (!IS_USER_ADDRESS(buffer))
915 		return B_BAD_ADDRESS;
916 
917 	TRACE(("user_ioctl: fd %d\n", fd));
918 
919 	SyscallRestartWrapper<status_t> status;
920 
921 	return status = fd_ioctl(false, fd, op, buffer, length);
922 }
923 
924 
925 ssize_t
926 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
927 	uint32 maxCount)
928 {
929 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
930 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
931 
932 	if (maxCount == 0)
933 		return 0;
934 
935 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
936 		return B_BAD_ADDRESS;
937 
938 	// get I/O context and FD
939 	io_context* ioContext = get_current_io_context(false);
940 	FDGetter fdGetter;
941 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
942 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
943 		return B_FILE_ERROR;
944 
945 	if (descriptor->ops->fd_read_dir == NULL)
946 		return B_UNSUPPORTED;
947 
948 	// restrict buffer size and allocate a heap buffer
949 	if (bufferSize > kMaxReadDirBufferSize)
950 		bufferSize = kMaxReadDirBufferSize;
951 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
952 	if (buffer == NULL)
953 		return B_NO_MEMORY;
954 	MemoryDeleter bufferDeleter(buffer);
955 
956 	// read the directory
957 	uint32 count = maxCount;
958 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
959 		buffer, bufferSize, &count);
960 	if (status != B_OK)
961 		return status;
962 
963 	// copy the buffer back -- determine the total buffer size first
964 	size_t sizeToCopy = 0;
965 	struct dirent* entry = buffer;
966 	for (uint32 i = 0; i < count; i++) {
967 		size_t length = entry->d_reclen;
968 		sizeToCopy += length;
969 		entry = (struct dirent*)((uint8*)entry + length);
970 	}
971 
972 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
973 		return B_BAD_ADDRESS;
974 
975 	return count;
976 }
977 
978 
979 status_t
980 _user_rewind_dir(int fd)
981 {
982 	struct file_descriptor* descriptor;
983 	status_t status;
984 
985 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
986 
987 	descriptor = get_fd(get_current_io_context(false), fd);
988 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
989 		return B_FILE_ERROR;
990 
991 	if (descriptor->ops->fd_rewind_dir != NULL)
992 		status = descriptor->ops->fd_rewind_dir(descriptor);
993 	else
994 		status = B_UNSUPPORTED;
995 
996 	put_fd(descriptor);
997 	return status;
998 }
999 
1000 
1001 status_t
1002 _user_close(int fd)
1003 {
1004 	return common_close(fd, false);
1005 }
1006 
1007 
1008 int
1009 _user_dup(int fd)
1010 {
1011 	return dup_fd(fd, false);
1012 }
1013 
1014 
1015 int
1016 _user_dup2(int ofd, int nfd)
1017 {
1018 	return dup2_fd(ofd, nfd, false);
1019 }
1020 
1021 
1022 //	#pragma mark - Kernel calls
1023 
1024 
1025 ssize_t
1026 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1027 {
1028 	if (pos < -1)
1029 		return B_BAD_VALUE;
1030 
1031 	FDGetter fdGetter;
1032 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1033 
1034 	if (!descriptor)
1035 		return B_FILE_ERROR;
1036 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1037 		return B_FILE_ERROR;
1038 
1039 	bool movePosition = false;
1040 	if (pos == -1) {
1041 		pos = descriptor->pos;
1042 		movePosition = true;
1043 	}
1044 
1045 	SyscallFlagUnsetter _;
1046 
1047 	if (descriptor->ops->fd_read == NULL)
1048 		return B_BAD_VALUE;
1049 
1050 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1051 		&length);
1052 	if (bytesRead >= B_OK) {
1053 		if (length > SSIZE_MAX)
1054 			bytesRead = SSIZE_MAX;
1055 		else
1056 			bytesRead = (ssize_t)length;
1057 
1058 		if (movePosition)
1059 			descriptor->pos = pos + length;
1060 	}
1061 
1062 	return bytesRead;
1063 }
1064 
1065 
1066 ssize_t
1067 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1068 {
1069 	bool movePosition = false;
1070 	status_t status;
1071 	uint32 i;
1072 
1073 	if (pos < -1)
1074 		return B_BAD_VALUE;
1075 
1076 	FDGetter fdGetter;
1077 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1078 
1079 	if (!descriptor)
1080 		return B_FILE_ERROR;
1081 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1082 		return B_FILE_ERROR;
1083 
1084 	if (pos == -1) {
1085 		pos = descriptor->pos;
1086 		movePosition = true;
1087 	}
1088 
1089 	if (descriptor->ops->fd_read == NULL)
1090 		return B_BAD_VALUE;
1091 
1092 	SyscallFlagUnsetter _;
1093 
1094 	ssize_t bytesRead = 0;
1095 
1096 	for (i = 0; i < count; i++) {
1097 		size_t length = vecs[i].iov_len;
1098 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1099 			&length);
1100 		if (status != B_OK) {
1101 			bytesRead = status;
1102 			break;
1103 		}
1104 
1105 		if ((uint64)bytesRead + length > SSIZE_MAX)
1106 			bytesRead = SSIZE_MAX;
1107 		else
1108 			bytesRead += (ssize_t)length;
1109 
1110 		pos += vecs[i].iov_len;
1111 	}
1112 
1113 	if (movePosition)
1114 		descriptor->pos = pos;
1115 
1116 	return bytesRead;
1117 }
1118 
1119 
1120 ssize_t
1121 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1122 {
1123 	if (pos < -1)
1124 		return B_BAD_VALUE;
1125 
1126 	FDGetter fdGetter;
1127 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1128 
1129 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1130 		return B_FILE_ERROR;
1131 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1132 		return B_FILE_ERROR;
1133 
1134 	bool movePosition = false;
1135 	if (pos == -1) {
1136 		pos = descriptor->pos;
1137 		movePosition = true;
1138 	}
1139 
1140 	if (descriptor->ops->fd_write == NULL)
1141 		return B_BAD_VALUE;
1142 
1143 	SyscallFlagUnsetter _;
1144 
1145 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1146 		&length);
1147 	if (bytesWritten >= B_OK) {
1148 		if (length > SSIZE_MAX)
1149 			bytesWritten = SSIZE_MAX;
1150 		else
1151 			bytesWritten = (ssize_t)length;
1152 
1153 		if (movePosition)
1154 			descriptor->pos = pos + length;
1155 	}
1156 
1157 	return bytesWritten;
1158 }
1159 
1160 
1161 ssize_t
1162 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1163 {
1164 	bool movePosition = false;
1165 	status_t status;
1166 	uint32 i;
1167 
1168 	if (pos < -1)
1169 		return B_BAD_VALUE;
1170 
1171 	FDGetter fdGetter;
1172 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1173 
1174 	if (!descriptor)
1175 		return B_FILE_ERROR;
1176 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1177 		return B_FILE_ERROR;
1178 
1179 	if (pos == -1) {
1180 		pos = descriptor->pos;
1181 		movePosition = true;
1182 	}
1183 
1184 	if (descriptor->ops->fd_write == NULL)
1185 		return B_BAD_VALUE;
1186 
1187 	SyscallFlagUnsetter _;
1188 
1189 	ssize_t bytesWritten = 0;
1190 
1191 	for (i = 0; i < count; i++) {
1192 		size_t length = vecs[i].iov_len;
1193 		status = descriptor->ops->fd_write(descriptor, pos,
1194 			vecs[i].iov_base, &length);
1195 		if (status != B_OK) {
1196 			bytesWritten = status;
1197 			break;
1198 		}
1199 
1200 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1201 			bytesWritten = SSIZE_MAX;
1202 		else
1203 			bytesWritten += (ssize_t)length;
1204 
1205 		pos += vecs[i].iov_len;
1206 	}
1207 
1208 	if (movePosition)
1209 		descriptor->pos = pos;
1210 
1211 	return bytesWritten;
1212 }
1213 
1214 
1215 off_t
1216 _kern_seek(int fd, off_t pos, int seekType)
1217 {
1218 	struct file_descriptor* descriptor;
1219 
1220 	descriptor = get_fd(get_current_io_context(true), fd);
1221 	if (!descriptor)
1222 		return B_FILE_ERROR;
1223 
1224 	if (descriptor->ops->fd_seek)
1225 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1226 	else
1227 		pos = ESPIPE;
1228 
1229 	put_fd(descriptor);
1230 	return pos;
1231 }
1232 
1233 
1234 status_t
1235 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1236 {
1237 	TRACE(("kern_ioctl: fd %d\n", fd));
1238 
1239 	SyscallFlagUnsetter _;
1240 
1241 	return fd_ioctl(true, fd, op, buffer, length);
1242 }
1243 
1244 
1245 ssize_t
1246 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1247 	uint32 maxCount)
1248 {
1249 	struct file_descriptor* descriptor;
1250 	ssize_t retval;
1251 
1252 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1253 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1254 
1255 	struct io_context* ioContext = get_current_io_context(true);
1256 	descriptor = get_fd(ioContext, fd);
1257 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1258 		return B_FILE_ERROR;
1259 
1260 	if (descriptor->ops->fd_read_dir) {
1261 		uint32 count = maxCount;
1262 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1263 			bufferSize, &count);
1264 		if (retval >= 0)
1265 			retval = count;
1266 	} else
1267 		retval = B_UNSUPPORTED;
1268 
1269 	put_fd(descriptor);
1270 	return retval;
1271 }
1272 
1273 
1274 status_t
1275 _kern_rewind_dir(int fd)
1276 {
1277 	struct file_descriptor* descriptor;
1278 	status_t status;
1279 
1280 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1281 
1282 	descriptor = get_fd(get_current_io_context(true), fd);
1283 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1284 		return B_FILE_ERROR;
1285 
1286 	if (descriptor->ops->fd_rewind_dir)
1287 		status = descriptor->ops->fd_rewind_dir(descriptor);
1288 	else
1289 		status = B_UNSUPPORTED;
1290 
1291 	put_fd(descriptor);
1292 	return status;
1293 }
1294 
1295 
1296 status_t
1297 _kern_close(int fd)
1298 {
1299 	return common_close(fd, true);
1300 }
1301 
1302 
1303 int
1304 _kern_dup(int fd)
1305 {
1306 	return dup_fd(fd, true);
1307 }
1308 
1309 
1310 int
1311 _kern_dup2(int ofd, int nfd)
1312 {
1313 	return dup2_fd(ofd, nfd, true);
1314 }
1315 
1316