xref: /haiku/src/system/kernel/fs/fd.cpp (revision fc7456e9b1ec38c941134ed6d01c438cf289381e)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 #include <sys/ioctl.h>
16 
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 #include <AutoDeleterDrivers.h>
21 #include <BytePointer.h>
22 #include <StackOrHeapArray.h>
23 
24 #include <syscalls.h>
25 #include <syscall_restart.h>
26 #include <slab/Slab.h>
27 #include <util/AutoLock.h>
28 #include <util/iovec_support.h>
29 #include <vfs.h>
30 #include <wait_for_objects.h>
31 
32 #include "vfs_tracing.h"
33 
34 
35 //#define TRACE_FD
36 #ifdef TRACE_FD
37 #	define TRACE(x) dprintf x
38 #else
39 #	define TRACE(x)
40 #endif
41 
42 
43 static const size_t kMaxReadDirBufferSize = B_PAGE_SIZE * 2;
44 
45 extern object_cache* sFileDescriptorCache;
46 
47 
48 static struct file_descriptor* get_fd_locked(struct io_context* context,
49 	int fd);
50 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
51 static void deselect_select_infos(file_descriptor* descriptor,
52 	select_info* infos, bool putSyncObjects);
53 
54 
55 //	#pragma mark - General fd routines
56 
57 
58 #ifdef DEBUG
59 void dump_fd(int fd, struct file_descriptor* descriptor);
60 
61 void
62 dump_fd(int fd,struct file_descriptor* descriptor)
63 {
64 	dprintf("fd[%d] = %p: ref_count = %" B_PRId32 ", ops "
65 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
66 		", pos = %" B_PRId64 "\n",
67 		fd, descriptor, descriptor->ref_count,
68 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
69 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
70 }
71 #endif
72 
73 
74 /*! Allocates and initializes a new file_descriptor.
75 */
76 struct file_descriptor*
77 alloc_fd(void)
78 {
79 	file_descriptor* descriptor
80 		= (file_descriptor*)object_cache_alloc(sFileDescriptorCache, 0);
81 	if (descriptor == NULL)
82 		return NULL;
83 
84 	descriptor->u.vnode = NULL;
85 	descriptor->cookie = NULL;
86 	descriptor->ref_count = 1;
87 	descriptor->open_count = 0;
88 	descriptor->open_mode = 0;
89 	descriptor->pos = -1;
90 
91 	return descriptor;
92 }
93 
94 
95 bool
96 fd_close_on_exec(struct io_context* context, int fd)
97 {
98 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
99 }
100 
101 
102 void
103 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
104 {
105 	if (closeFD)
106 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
107 	else
108 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
109 }
110 
111 
112 /*!	Searches a free slot in the FD table of the provided I/O context, and
113 	inserts the specified descriptor into it.
114 */
115 int
116 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
117 	int firstIndex)
118 {
119 	int fd = -1;
120 	uint32 i;
121 
122 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
123 		return B_BAD_VALUE;
124 
125 	mutex_lock(&context->io_mutex);
126 
127 	for (i = firstIndex; i < context->table_size; i++) {
128 		if (!context->fds[i]) {
129 			fd = i;
130 			break;
131 		}
132 	}
133 	if (fd < 0) {
134 		fd = B_NO_MORE_FDS;
135 		goto err;
136 	}
137 
138 	TFD(NewFD(context, fd, descriptor));
139 
140 	context->fds[fd] = descriptor;
141 	context->num_used_fds++;
142 	atomic_add(&descriptor->open_count, 1);
143 
144 err:
145 	mutex_unlock(&context->io_mutex);
146 
147 	return fd;
148 }
149 
150 
151 int
152 new_fd(struct io_context* context, struct file_descriptor* descriptor)
153 {
154 	return new_fd_etc(context, descriptor, 0);
155 }
156 
157 
158 /*!	Reduces the descriptor's reference counter, and frees all resources
159 	when it's no longer used.
160 */
161 void
162 put_fd(struct file_descriptor* descriptor)
163 {
164 	int32 previous = atomic_add(&descriptor->ref_count, -1);
165 
166 	TFD(PutFD(descriptor));
167 
168 	TRACE(("put_fd(descriptor = %p [ref = %" B_PRId32 ", cookie = %p])\n",
169 		descriptor, descriptor->ref_count, descriptor->cookie));
170 
171 	// free the descriptor if we don't need it anymore
172 	if (previous == 1) {
173 		// free the underlying object
174 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
175 			descriptor->ops->fd_free(descriptor);
176 
177 		object_cache_free(sFileDescriptorCache, descriptor, 0);
178 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
179 		&& previous - 1 == descriptor->open_count
180 		&& descriptor->ops != NULL) {
181 		// the descriptor has been disconnected - it cannot
182 		// be accessed anymore, let's close it (no one is
183 		// currently accessing this descriptor)
184 
185 		if (descriptor->ops->fd_close)
186 			descriptor->ops->fd_close(descriptor);
187 		if (descriptor->ops->fd_free)
188 			descriptor->ops->fd_free(descriptor);
189 
190 		// prevent this descriptor from being closed/freed again
191 		descriptor->ops = NULL;
192 		descriptor->u.vnode = NULL;
193 
194 		// the file descriptor is kept intact, so that it's not
195 		// reused until someone explicitly closes it
196 	}
197 }
198 
199 
200 /*!	Decrements the open counter of the file descriptor and invokes
201 	its close hook when appropriate.
202 */
203 void
204 close_fd(struct io_context* context, struct file_descriptor* descriptor)
205 {
206 	// POSIX advisory locks need to be released when any file descriptor closes
207 	if (fd_is_file(descriptor))
208 		vfs_release_posix_lock(context, descriptor);
209 
210 	if (atomic_add(&descriptor->open_count, -1) == 1) {
211 		vfs_unlock_vnode_if_locked(descriptor);
212 
213 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
214 			descriptor->ops->fd_close(descriptor);
215 	}
216 }
217 
218 
219 status_t
220 close_fd_index(struct io_context* context, int fd)
221 {
222 	struct file_descriptor* descriptor = remove_fd(context, fd);
223 
224 	if (descriptor == NULL)
225 		return B_FILE_ERROR;
226 
227 	close_fd(context, descriptor);
228 	put_fd(descriptor);
229 		// the reference associated with the slot
230 
231 	return B_OK;
232 }
233 
234 
235 /*!	This descriptor's underlying object will be closed and freed as soon as
236 	possible (in one of the next calls to put_fd() - get_fd() will no longer
237 	succeed on this descriptor).
238 	This is useful if the underlying object is gone, for instance when a
239 	(mounted) volume got removed unexpectedly.
240 */
241 void
242 disconnect_fd(struct file_descriptor* descriptor)
243 {
244 	descriptor->open_mode |= O_DISCONNECTED;
245 }
246 
247 
248 void
249 inc_fd_ref_count(struct file_descriptor* descriptor)
250 {
251 	atomic_add(&descriptor->ref_count, 1);
252 }
253 
254 
255 static struct file_descriptor*
256 get_fd_locked(struct io_context* context, int fd)
257 {
258 	if (fd < 0 || (uint32)fd >= context->table_size)
259 		return NULL;
260 
261 	struct file_descriptor* descriptor = context->fds[fd];
262 
263 	if (descriptor != NULL) {
264 		// disconnected descriptors cannot be accessed anymore
265 		if (descriptor->open_mode & O_DISCONNECTED)
266 			return NULL;
267 
268 		TFD(GetFD(context, fd, descriptor));
269 		inc_fd_ref_count(descriptor);
270 	}
271 
272 	return descriptor;
273 }
274 
275 
276 struct file_descriptor*
277 get_fd(struct io_context* context, int fd)
278 {
279 	MutexLocker _(context->io_mutex);
280 
281 	return get_fd_locked(context, fd);
282 }
283 
284 
285 struct file_descriptor*
286 get_open_fd(struct io_context* context, int fd)
287 {
288 	MutexLocker _(context->io_mutex);
289 
290 	file_descriptor* descriptor = get_fd_locked(context, fd);
291 	if (descriptor == NULL)
292 		return NULL;
293 
294 	atomic_add(&descriptor->open_count, 1);
295 
296 	return descriptor;
297 }
298 
299 
300 /*!	Removes the file descriptor from the specified slot.
301 */
302 static struct file_descriptor*
303 remove_fd(struct io_context* context, int fd)
304 {
305 	struct file_descriptor* descriptor = NULL;
306 
307 	if (fd < 0)
308 		return NULL;
309 
310 	mutex_lock(&context->io_mutex);
311 
312 	if ((uint32)fd < context->table_size)
313 		descriptor = context->fds[fd];
314 
315 	select_info* selectInfos = NULL;
316 	bool disconnected = false;
317 
318 	if (descriptor != NULL)	{
319 		// fd is valid
320 		TFD(RemoveFD(context, fd, descriptor));
321 
322 		context->fds[fd] = NULL;
323 		fd_set_close_on_exec(context, fd, false);
324 		context->num_used_fds--;
325 
326 		selectInfos = context->select_infos[fd];
327 		context->select_infos[fd] = NULL;
328 
329 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
330 	}
331 
332 	if (selectInfos != NULL)
333 		deselect_select_infos(descriptor, selectInfos, true);
334 
335 	mutex_unlock(&context->io_mutex);
336 
337 	return disconnected ? NULL : descriptor;
338 }
339 
340 
341 static int
342 dup_fd(int fd, bool kernel)
343 {
344 	struct io_context* context = get_current_io_context(kernel);
345 	struct file_descriptor* descriptor;
346 	int status;
347 
348 	TRACE(("dup_fd: fd = %d\n", fd));
349 
350 	// Try to get the fd structure
351 	descriptor = get_fd(context, fd);
352 	if (descriptor == NULL)
353 		return B_FILE_ERROR;
354 
355 	// now put the fd in place
356 	status = new_fd(context, descriptor);
357 	if (status < 0)
358 		put_fd(descriptor);
359 	else {
360 		mutex_lock(&context->io_mutex);
361 		fd_set_close_on_exec(context, status, false);
362 		mutex_unlock(&context->io_mutex);
363 	}
364 
365 	return status;
366 }
367 
368 
369 /*!	POSIX says this should be the same as:
370 		close(newfd);
371 		fcntl(oldfd, F_DUPFD, newfd);
372 
373 	We do dup2() directly to be thread-safe.
374 */
375 static int
376 dup2_fd(int oldfd, int newfd, bool kernel)
377 {
378 	struct file_descriptor* evicted = NULL;
379 	struct io_context* context;
380 
381 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
382 
383 	// quick check
384 	if (oldfd < 0 || newfd < 0)
385 		return B_FILE_ERROR;
386 
387 	// Get current I/O context and lock it
388 	context = get_current_io_context(kernel);
389 	mutex_lock(&context->io_mutex);
390 
391 	// Check if the fds are valid (mutex must be locked because
392 	// the table size could be changed)
393 	if ((uint32)oldfd >= context->table_size
394 		|| (uint32)newfd >= context->table_size
395 		|| context->fds[oldfd] == NULL
396 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
397 		mutex_unlock(&context->io_mutex);
398 		return B_FILE_ERROR;
399 	}
400 
401 	// Check for identity, note that it cannot be made above
402 	// because we always want to return an error on invalid
403 	// handles
404 	if (oldfd != newfd) {
405 		// Now do the work
406 		TFD(Dup2FD(context, oldfd, newfd));
407 
408 		evicted = context->fds[newfd];
409 		select_info* selectInfos = context->select_infos[newfd];
410 		context->select_infos[newfd] = NULL;
411 		atomic_add(&context->fds[oldfd]->ref_count, 1);
412 		atomic_add(&context->fds[oldfd]->open_count, 1);
413 		context->fds[newfd] = context->fds[oldfd];
414 
415 		if (evicted == NULL)
416 			context->num_used_fds++;
417 
418 		deselect_select_infos(evicted, selectInfos, true);
419 	}
420 
421 	fd_set_close_on_exec(context, newfd, false);
422 
423 	mutex_unlock(&context->io_mutex);
424 
425 	// Say bye bye to the evicted fd
426 	if (evicted) {
427 		close_fd(context, evicted);
428 		put_fd(evicted);
429 	}
430 
431 	return newfd;
432 }
433 
434 
435 /*!	Duplicates an FD from another team to this/the kernel team.
436 	\param fromTeam The team which owns the FD.
437 	\param fd The FD to duplicate.
438 	\param kernel If \c true, the new FD will be created in the kernel team,
439 			the current userland team otherwise.
440 	\return The newly created FD or an error code, if something went wrong.
441 */
442 int
443 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
444 {
445 	// get the I/O context for the team in question
446 	Team* team = Team::Get(fromTeam);
447 	if (team == NULL)
448 		return B_BAD_TEAM_ID;
449 	BReference<Team> teamReference(team, true);
450 
451 	io_context* fromContext = team->io_context;
452 
453 	// get the file descriptor
454 	file_descriptor* descriptor = get_fd(fromContext, fd);
455 	if (descriptor == NULL)
456 		return B_FILE_ERROR;
457 	FileDescriptorPutter descriptorPutter(descriptor);
458 
459 	// create a new FD in the target I/O context
460 	int result = new_fd(get_current_io_context(kernel), descriptor);
461 	if (result >= 0) {
462 		// the descriptor reference belongs to the slot, now
463 		descriptorPutter.Detach();
464 	}
465 
466 	return result;
467 }
468 
469 
470 static status_t
471 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
472 {
473 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(kernelFD), fd));
474 	if (!descriptor.IsSet())
475 		return B_FILE_ERROR;
476 
477 	// Special case: translate FIONBIO into fcntl(F_SETFL).
478 	if (op == FIONBIO) {
479 		if (buffer == NULL)
480 			return B_BAD_VALUE;
481 
482 		int value;
483 		if (is_called_via_syscall()) {
484 			if (!IS_USER_ADDRESS(buffer)
485 				|| user_memcpy(&value, buffer, sizeof(int)) != B_OK) {
486 				return B_BAD_ADDRESS;
487 			}
488 		} else
489 			value = *(int*)buffer;
490 
491 		size_t argument = descriptor->open_mode & ~O_NONBLOCK;
492 		argument |= (value ? O_NONBLOCK : 0);
493 
494 		return (kernelFD ? _kern_fcntl : _user_fcntl)(fd, F_SETFL, argument);
495 	}
496 
497 	status_t status;
498 	if (descriptor->ops->fd_ioctl)
499 		status = descriptor->ops->fd_ioctl(descriptor.Get(), op, buffer, length);
500 	else
501 		status = B_DEV_INVALID_IOCTL;
502 
503 	if (status == B_DEV_INVALID_IOCTL)
504 		status = ENOTTY;
505 
506 	return status;
507 }
508 
509 
510 static void
511 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
512 	bool putSyncObjects)
513 {
514 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
515 
516 	select_info* info = infos;
517 	while (info != NULL) {
518 		select_sync* sync = info->sync;
519 
520 		// deselect the selected events
521 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
522 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
523 			for (uint16 event = 1; event < 16; event++) {
524 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
525 					descriptor->ops->fd_deselect(descriptor, event,
526 						(selectsync*)info);
527 				}
528 			}
529 		}
530 
531 		select_info* next = info->next;
532 		notify_select_events(info, B_EVENT_INVALID);
533 		info = next;
534 
535 		if (putSyncObjects)
536 			put_select_sync(sync);
537 	}
538 }
539 
540 
541 status_t
542 select_fd(int32 fd, struct select_info* info, bool kernel)
543 {
544 	TRACE(("select_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
545 		info->sync, info->selected_events));
546 
547 	FileDescriptorPutter descriptor;
548 		// define before the context locker, so it will be destroyed after it
549 
550 	io_context* context = get_current_io_context(kernel);
551 	MutexLocker locker(context->io_mutex);
552 
553 	descriptor.SetTo(get_fd_locked(context, fd));
554 	if (!descriptor.IsSet())
555 		return B_FILE_ERROR;
556 
557 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
558 
559 	if (descriptor->ops->fd_select == NULL) {
560 		// if the I/O subsystem doesn't support select(), we will
561 		// immediately notify the select call
562 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
563 		if (eventsToSelect != 0)
564 			notify_select_events(info, eventsToSelect);
565 
566 		info->selected_events = 0;
567 		return B_UNSUPPORTED;
568 	}
569 
570 	// We need the FD to stay open while we're doing this, so no select()/
571 	// deselect() will be called on it after it is closed.
572 	atomic_add(&descriptor->open_count, 1);
573 
574 	locker.Unlock();
575 
576 	// select any events asked for
577 	uint32 selectedEvents = 0;
578 
579 	for (uint16 event = 1; event < 16; event++) {
580 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
581 			&& descriptor->ops->fd_select(descriptor.Get(), event,
582 				(selectsync*)info) == B_OK) {
583 			selectedEvents |= SELECT_FLAG(event);
584 		}
585 	}
586 	info->selected_events = selectedEvents
587 		| (info->selected_events & B_EVENT_INVALID);
588 
589 	// Add the info to the IO context. Even if nothing has been selected -- we
590 	// always support B_EVENT_INVALID.
591 	locker.Lock();
592 	if (context->fds[fd] != descriptor.Get()) {
593 		// Someone close()d the index in the meantime. deselect() all
594 		// events.
595 		info->next = NULL;
596 		deselect_select_infos(descriptor.Get(), info, false);
597 
598 		// Release our open reference of the descriptor.
599 		close_fd(context, descriptor.Get());
600 		return B_FILE_ERROR;
601 	}
602 
603 	// The FD index hasn't changed, so we add the select info to the table.
604 
605 	info->next = context->select_infos[fd];
606 	context->select_infos[fd] = info;
607 
608 	// As long as the info is in the list, we keep a reference to the sync
609 	// object.
610 	acquire_select_sync(info->sync);
611 
612 	// Finally release our open reference. It is safe just to decrement,
613 	// since as long as the descriptor is associated with the slot,
614 	// someone else still has it open.
615 	atomic_add(&descriptor->open_count, -1);
616 
617 	return B_OK;
618 }
619 
620 
621 status_t
622 deselect_fd(int32 fd, struct select_info* info, bool kernel)
623 {
624 	TRACE(("deselect_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
625 		info->sync, info->selected_events));
626 
627 	FileDescriptorPutter descriptor;
628 		// define before the context locker, so it will be destroyed after it
629 
630 	io_context* context = get_current_io_context(kernel);
631 	MutexLocker locker(context->io_mutex);
632 
633 	descriptor.SetTo(get_fd_locked(context, fd));
634 	if (!descriptor.IsSet())
635 		return B_FILE_ERROR;
636 
637 	// remove the info from the IO context
638 
639 	select_info** infoLocation = &context->select_infos[fd];
640 	while (*infoLocation != NULL && *infoLocation != info)
641 		infoLocation = &(*infoLocation)->next;
642 
643 	// If not found, someone else beat us to it.
644 	if (*infoLocation != info)
645 		return B_OK;
646 
647 	*infoLocation = info->next;
648 
649 	locker.Unlock();
650 
651 	// deselect the selected events
652 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
653 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
654 		for (uint16 event = 1; event < 16; event++) {
655 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
656 				descriptor->ops->fd_deselect(descriptor.Get(), event,
657 					(selectsync*)info);
658 			}
659 		}
660 	}
661 
662 	put_select_sync(info->sync);
663 
664 	return B_OK;
665 }
666 
667 
668 /*!	This function checks if the specified fd is valid in the current
669 	context. It can be used for a quick check; the fd is not locked
670 	so it could become invalid immediately after this check.
671 */
672 bool
673 fd_is_valid(int fd, bool kernel)
674 {
675 	struct file_descriptor* descriptor
676 		= get_fd(get_current_io_context(kernel), fd);
677 	if (descriptor == NULL)
678 		return false;
679 
680 	put_fd(descriptor);
681 	return true;
682 }
683 
684 
685 static ssize_t
686 common_vector_io(int fd, off_t pos, const iovec* vecs, size_t count, bool write, bool kernel)
687 {
688 	if (pos < -1)
689 		return B_BAD_VALUE;
690 
691 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(kernel), fd));
692 	if (!descriptor.IsSet())
693 		return B_FILE_ERROR;
694 
695 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
696 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
697 		return B_FILE_ERROR;
698 	}
699 
700 	bool movePosition = false;
701 	if (pos == -1 && descriptor->pos != -1) {
702 		pos = descriptor->pos;
703 		movePosition = true;
704 	}
705 
706 	if (write ? descriptor->ops->fd_write == NULL
707 			: descriptor->ops->fd_read == NULL) {
708 		return B_BAD_VALUE;
709 	}
710 
711 	if (!movePosition && count > 1 && (write ? descriptor->ops->fd_writev != NULL
712 			: descriptor->ops->fd_readv != NULL)) {
713 		ssize_t result;
714 		if (write) {
715 			result = descriptor->ops->fd_writev(descriptor.Get(), pos,
716 				vecs, count);
717 		} else {
718 			result = descriptor->ops->fd_readv(descriptor.Get(), pos,
719 				vecs, count);
720 		}
721 		if (result != B_UNSUPPORTED)
722 			return result;
723 		// If not supported, just fall back to the loop.
724 	}
725 
726 	status_t status = B_OK;
727 	ssize_t bytesTransferred = 0;
728 	for (size_t i = 0; i < count; i++) {
729 		if (vecs[i].iov_base == NULL)
730 			continue;
731 
732 		size_t length = vecs[i].iov_len;
733 		if (write) {
734 			status = descriptor->ops->fd_write(descriptor.Get(), pos,
735 				vecs[i].iov_base, &length);
736 		} else {
737 			status = descriptor->ops->fd_read(descriptor.Get(), pos,
738 				vecs[i].iov_base, &length);
739 		}
740 
741 		if (status != B_OK) {
742 			if (bytesTransferred == 0)
743 				return status;
744 			break;
745 		}
746 
747 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
748 			bytesTransferred = SSIZE_MAX;
749 		else
750 			bytesTransferred += (ssize_t)length;
751 
752 		if (pos != -1)
753 			pos += length;
754 
755 		if (length < vecs[i].iov_len)
756 			break;
757 	}
758 
759 	if (movePosition) {
760 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
761 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos;
762 	}
763 
764 	return bytesTransferred;
765 }
766 
767 
768 static ssize_t
769 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
770 {
771 	if (pos < -1)
772 		return B_BAD_VALUE;
773 
774 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
775 	if (!descriptor.IsSet())
776 		return B_FILE_ERROR;
777 
778 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
779 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
780 		return B_FILE_ERROR;
781 	}
782 
783 	bool movePosition = false;
784 	if (pos == -1 && descriptor->pos != -1) {
785 		pos = descriptor->pos;
786 		movePosition = true;
787 	}
788 
789 	if (write ? descriptor->ops->fd_write == NULL
790 			: descriptor->ops->fd_read == NULL) {
791 		return B_BAD_VALUE;
792 	}
793 
794 	if (length == 0)
795 		return 0;
796 
797 	if (!is_user_address_range(buffer, length))
798 		return B_BAD_ADDRESS;
799 
800 	SyscallRestartWrapper<status_t> status;
801 
802 	if (write)
803 		status = descriptor->ops->fd_write(descriptor.Get(), pos, buffer, &length);
804 	else
805 		status = descriptor->ops->fd_read(descriptor.Get(), pos, buffer, &length);
806 
807 	if (status != B_OK)
808 		return status;
809 
810 	if (movePosition) {
811 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
812 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos + length;
813 	}
814 
815 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
816 }
817 
818 
819 static ssize_t
820 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
821 	bool write)
822 {
823 	if (count > IOV_MAX)
824 		return B_BAD_VALUE;
825 
826 	BStackOrHeapArray<iovec, 16> vecs(count);
827 	if (!vecs.IsValid())
828 		return B_NO_MEMORY;
829 
830 	status_t error = get_iovecs_from_user(userVecs, count, vecs, true);
831 	if (error != B_OK)
832 		return error;
833 
834 	SyscallRestartWrapper<ssize_t> result;
835 	result = common_vector_io(fd, pos, vecs, count, write, false);
836 
837 	return result;
838 }
839 
840 
841 static status_t
842 common_close(int fd, bool kernel)
843 {
844 	return close_fd_index(get_current_io_context(kernel), fd);
845 }
846 
847 
848 status_t
849 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
850 {
851 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
852 
853 	return fd_ioctl(false, fd, op, buffer, length);
854 }
855 
856 
857 //	#pragma mark - User syscalls
858 
859 
860 ssize_t
861 _user_read(int fd, off_t pos, void* buffer, size_t length)
862 {
863 	return common_user_io(fd, pos, buffer, length, false);
864 }
865 
866 
867 ssize_t
868 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
869 {
870 	return common_user_vector_io(fd, pos, userVecs, count, false);
871 }
872 
873 
874 ssize_t
875 _user_write(int fd, off_t pos, const void* buffer, size_t length)
876 {
877 	return common_user_io(fd, pos, (void*)buffer, length, true);
878 }
879 
880 
881 ssize_t
882 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
883 {
884 	return common_user_vector_io(fd, pos, userVecs, count, true);
885 }
886 
887 
888 off_t
889 _user_seek(int fd, off_t pos, int seekType)
890 {
891 	syscall_64_bit_return_value();
892 
893 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
894 	if (!descriptor.IsSet())
895 		return B_FILE_ERROR;
896 
897 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
898 
899 	if (descriptor->ops->fd_seek)
900 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
901 	else
902 		pos = ESPIPE;
903 
904 	return pos;
905 }
906 
907 
908 status_t
909 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
910 {
911 	TRACE(("user_ioctl: fd %d\n", fd));
912 
913 	// "buffer" is not always a pointer depending on "op", so we cannot
914 	// check that it is a userland buffer here. Instead we check that
915 	// it is at least not within the bounds of kernel memory; as in
916 	// the cases where it is a numeric constant it is usually a low one.
917 	if (IS_KERNEL_ADDRESS(buffer))
918 		return B_BAD_ADDRESS;
919 
920 	SyscallRestartWrapper<status_t> status;
921 
922 	return status = fd_ioctl(false, fd, op, buffer, length);
923 }
924 
925 
926 ssize_t
927 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
928 	uint32 maxCount)
929 {
930 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
931 		"%" B_PRIu32 ")\n", fd, userBuffer, bufferSize, maxCount));
932 
933 	if (maxCount == 0)
934 		return 0;
935 
936 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
937 		return B_BAD_ADDRESS;
938 
939 	// get I/O context and FD
940 	io_context* ioContext = get_current_io_context(false);
941 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
942 	if (!descriptor.IsSet())
943 		return B_FILE_ERROR;
944 
945 	if (descriptor->ops->fd_read_dir == NULL)
946 		return B_UNSUPPORTED;
947 
948 	// restrict buffer size and allocate a heap buffer
949 	if (bufferSize > kMaxReadDirBufferSize)
950 		bufferSize = kMaxReadDirBufferSize;
951 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
952 	if (buffer == NULL)
953 		return B_NO_MEMORY;
954 	MemoryDeleter bufferDeleter(buffer);
955 
956 	// read the directory
957 	uint32 count = maxCount;
958 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(),
959 		buffer, bufferSize, &count);
960 	if (status != B_OK)
961 		return status;
962 
963 	ASSERT(count <= maxCount);
964 
965 	// copy the buffer back -- determine the total buffer size first
966 	size_t sizeToCopy = 0;
967 	BytePointer<struct dirent> entry = buffer;
968 	for (uint32 i = 0; i < count; i++) {
969 		size_t length = entry->d_reclen;
970 		sizeToCopy += length;
971 		entry += length;
972 	}
973 
974 	ASSERT(sizeToCopy <= bufferSize);
975 
976 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
977 		return B_BAD_ADDRESS;
978 
979 	return count;
980 }
981 
982 
983 status_t
984 _user_rewind_dir(int fd)
985 {
986 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
987 
988 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
989 	if (!descriptor.IsSet())
990 		return B_FILE_ERROR;
991 
992 	status_t status;
993 	if (descriptor->ops->fd_rewind_dir)
994 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
995 	else
996 		status = B_UNSUPPORTED;
997 
998 	return status;
999 }
1000 
1001 
1002 status_t
1003 _user_close(int fd)
1004 {
1005 	return common_close(fd, false);
1006 }
1007 
1008 
1009 int
1010 _user_dup(int fd)
1011 {
1012 	return dup_fd(fd, false);
1013 }
1014 
1015 
1016 int
1017 _user_dup2(int ofd, int nfd)
1018 {
1019 	return dup2_fd(ofd, nfd, false);
1020 }
1021 
1022 
1023 //	#pragma mark - Kernel calls
1024 
1025 
1026 ssize_t
1027 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1028 {
1029 	if (pos < -1)
1030 		return B_BAD_VALUE;
1031 
1032 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1033 
1034 	if (!descriptor.IsSet())
1035 		return B_FILE_ERROR;
1036 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1037 		return B_FILE_ERROR;
1038 
1039 	bool movePosition = false;
1040 	if (pos == -1 && descriptor->pos != -1) {
1041 		pos = descriptor->pos;
1042 		movePosition = true;
1043 	}
1044 
1045 	SyscallFlagUnsetter _;
1046 
1047 	if (descriptor->ops->fd_read == NULL)
1048 		return B_BAD_VALUE;
1049 
1050 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor.Get(), pos, buffer,
1051 		&length);
1052 	if (bytesRead >= B_OK) {
1053 		if (length > SSIZE_MAX)
1054 			bytesRead = SSIZE_MAX;
1055 		else
1056 			bytesRead = (ssize_t)length;
1057 
1058 		if (movePosition)
1059 			descriptor->pos = pos + length;
1060 	}
1061 
1062 	return bytesRead;
1063 }
1064 
1065 
1066 ssize_t
1067 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1068 {
1069 	if (pos < -1)
1070 		return B_BAD_VALUE;
1071 
1072 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1073 
1074 	if (!descriptor.IsSet())
1075 		return B_FILE_ERROR;
1076 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1077 		return B_FILE_ERROR;
1078 
1079 	bool movePosition = false;
1080 	if (pos == -1 && descriptor->pos != -1) {
1081 		pos = descriptor->pos;
1082 		movePosition = true;
1083 	}
1084 
1085 	if (descriptor->ops->fd_write == NULL)
1086 		return B_BAD_VALUE;
1087 
1088 	SyscallFlagUnsetter _;
1089 
1090 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor.Get(), pos,
1091 		buffer,	&length);
1092 	if (bytesWritten >= B_OK) {
1093 		if (length > SSIZE_MAX)
1094 			bytesWritten = SSIZE_MAX;
1095 		else
1096 			bytesWritten = (ssize_t)length;
1097 
1098 		if (movePosition)
1099 			descriptor->pos = pos + length;
1100 	}
1101 
1102 	return bytesWritten;
1103 }
1104 
1105 
1106 ssize_t
1107 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1108 {
1109 	SyscallFlagUnsetter _;
1110 	return common_vector_io(fd, pos, vecs, count, false, true);
1111 }
1112 
1113 
1114 ssize_t
1115 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1116 {
1117 	SyscallFlagUnsetter _;
1118 	return common_vector_io(fd, pos, vecs, count, true, true);
1119 }
1120 
1121 
1122 off_t
1123 _kern_seek(int fd, off_t pos, int seekType)
1124 {
1125 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1126 	if (!descriptor.IsSet())
1127 		return B_FILE_ERROR;
1128 
1129 	if (descriptor->ops->fd_seek)
1130 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
1131 	else
1132 		pos = ESPIPE;
1133 
1134 	return pos;
1135 }
1136 
1137 
1138 status_t
1139 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1140 {
1141 	TRACE(("kern_ioctl: fd %d\n", fd));
1142 
1143 	SyscallFlagUnsetter _;
1144 
1145 	return fd_ioctl(true, fd, op, buffer, length);
1146 }
1147 
1148 
1149 ssize_t
1150 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1151 	uint32 maxCount)
1152 {
1153 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1154 		"%" B_PRIu32 ")\n",fd, buffer, bufferSize, maxCount));
1155 
1156 	struct io_context* ioContext = get_current_io_context(true);
1157 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
1158 	if (!descriptor.IsSet())
1159 		return B_FILE_ERROR;
1160 
1161 	ssize_t retval;
1162 	if (descriptor->ops->fd_read_dir) {
1163 		uint32 count = maxCount;
1164 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(), buffer,
1165 			bufferSize, &count);
1166 		if (retval >= 0)
1167 			retval = count;
1168 	} else
1169 		retval = B_UNSUPPORTED;
1170 
1171 	return retval;
1172 }
1173 
1174 
1175 status_t
1176 _kern_rewind_dir(int fd)
1177 {
1178 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1179 
1180 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1181 	if (!descriptor.IsSet())
1182 		return B_FILE_ERROR;
1183 
1184 	status_t status;
1185 	if (descriptor->ops->fd_rewind_dir)
1186 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
1187 	else
1188 		status = B_UNSUPPORTED;
1189 
1190 	return status;
1191 }
1192 
1193 
1194 status_t
1195 _kern_close(int fd)
1196 {
1197 	return common_close(fd, true);
1198 }
1199 
1200 
1201 int
1202 _kern_dup(int fd)
1203 {
1204 	return dup_fd(fd, true);
1205 }
1206 
1207 
1208 int
1209 _kern_dup2(int ofd, int nfd)
1210 {
1211 	return dup2_fd(ofd, nfd, true);
1212 }
1213 
1214