xref: /haiku/src/system/kernel/fs/fd.cpp (revision b8a45b3a2df2379b4301bf3bd5949b9a105be4ba)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 #include <sys/ioctl.h>
16 
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 #include <AutoDeleterDrivers.h>
21 #include <BytePointer.h>
22 #include <StackOrHeapArray.h>
23 
24 #include <syscalls.h>
25 #include <syscall_restart.h>
26 #include <slab/Slab.h>
27 #include <util/AutoLock.h>
28 #include <util/iovec_support.h>
29 #include <vfs.h>
30 #include <wait_for_objects.h>
31 
32 #include "vfs_tracing.h"
33 
34 
35 //#define TRACE_FD
36 #ifdef TRACE_FD
37 #	define TRACE(x) dprintf x
38 #else
39 #	define TRACE(x)
40 #endif
41 
42 
43 static const size_t kMaxReadDirBufferSize = 64 * 1024;
44 
45 extern object_cache* sFileDescriptorCache;
46 
47 
48 static struct file_descriptor* get_fd_locked(struct io_context* context,
49 	int fd);
50 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
51 static void deselect_select_infos(file_descriptor* descriptor,
52 	select_info* infos, bool putSyncObjects);
53 
54 
55 //	#pragma mark - General fd routines
56 
57 
58 #ifdef DEBUG
59 void dump_fd(int fd, struct file_descriptor* descriptor);
60 
61 void
62 dump_fd(int fd,struct file_descriptor* descriptor)
63 {
64 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
65 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
66 		", pos = %" B_PRId64 "\n",
67 		fd, descriptor, descriptor->type, descriptor->ref_count,
68 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
69 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
70 }
71 #endif
72 
73 
74 /*! Allocates and initializes a new file_descriptor.
75 */
76 struct file_descriptor*
77 alloc_fd(void)
78 {
79 	file_descriptor* descriptor
80 		= (file_descriptor*)object_cache_alloc(sFileDescriptorCache, 0);
81 	if (descriptor == NULL)
82 		return NULL;
83 
84 	descriptor->u.vnode = NULL;
85 	descriptor->cookie = NULL;
86 	descriptor->ref_count = 1;
87 	descriptor->open_count = 0;
88 	descriptor->open_mode = 0;
89 	descriptor->pos = -1;
90 
91 	return descriptor;
92 }
93 
94 
95 bool
96 fd_close_on_exec(struct io_context* context, int fd)
97 {
98 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
99 }
100 
101 
102 void
103 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
104 {
105 	if (closeFD)
106 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
107 	else
108 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
109 }
110 
111 
112 /*!	Searches a free slot in the FD table of the provided I/O context, and
113 	inserts the specified descriptor into it.
114 */
115 int
116 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
117 	int firstIndex)
118 {
119 	int fd = -1;
120 	uint32 i;
121 
122 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
123 		return B_BAD_VALUE;
124 
125 	mutex_lock(&context->io_mutex);
126 
127 	for (i = firstIndex; i < context->table_size; i++) {
128 		if (!context->fds[i]) {
129 			fd = i;
130 			break;
131 		}
132 	}
133 	if (fd < 0) {
134 		fd = B_NO_MORE_FDS;
135 		goto err;
136 	}
137 
138 	TFD(NewFD(context, fd, descriptor));
139 
140 	context->fds[fd] = descriptor;
141 	context->num_used_fds++;
142 	atomic_add(&descriptor->open_count, 1);
143 
144 err:
145 	mutex_unlock(&context->io_mutex);
146 
147 	return fd;
148 }
149 
150 
151 int
152 new_fd(struct io_context* context, struct file_descriptor* descriptor)
153 {
154 	return new_fd_etc(context, descriptor, 0);
155 }
156 
157 
158 /*!	Reduces the descriptor's reference counter, and frees all resources
159 	when it's no longer used.
160 */
161 void
162 put_fd(struct file_descriptor* descriptor)
163 {
164 	int32 previous = atomic_add(&descriptor->ref_count, -1);
165 
166 	TFD(PutFD(descriptor));
167 
168 	TRACE(("put_fd(descriptor = %p [ref = %" B_PRId32 ", cookie = %p])\n",
169 		descriptor, descriptor->ref_count, descriptor->cookie));
170 
171 	// free the descriptor if we don't need it anymore
172 	if (previous == 1) {
173 		// free the underlying object
174 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
175 			descriptor->ops->fd_free(descriptor);
176 
177 		object_cache_free(sFileDescriptorCache, descriptor, 0);
178 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
179 		&& previous - 1 == descriptor->open_count
180 		&& descriptor->ops != NULL) {
181 		// the descriptor has been disconnected - it cannot
182 		// be accessed anymore, let's close it (no one is
183 		// currently accessing this descriptor)
184 
185 		if (descriptor->ops->fd_close)
186 			descriptor->ops->fd_close(descriptor);
187 		if (descriptor->ops->fd_free)
188 			descriptor->ops->fd_free(descriptor);
189 
190 		// prevent this descriptor from being closed/freed again
191 		descriptor->ops = NULL;
192 		descriptor->u.vnode = NULL;
193 
194 		// the file descriptor is kept intact, so that it's not
195 		// reused until someone explicitly closes it
196 	}
197 }
198 
199 
200 /*!	Decrements the open counter of the file descriptor and invokes
201 	its close hook when appropriate.
202 */
203 void
204 close_fd(struct io_context* context, struct file_descriptor* descriptor)
205 {
206 	// POSIX advisory locks need to be released when any file descriptor closes
207 	if (descriptor->type == FDTYPE_FILE)
208 		vfs_release_posix_lock(context, descriptor);
209 
210 	if (atomic_add(&descriptor->open_count, -1) == 1) {
211 		vfs_unlock_vnode_if_locked(descriptor);
212 
213 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
214 			descriptor->ops->fd_close(descriptor);
215 	}
216 }
217 
218 
219 status_t
220 close_fd_index(struct io_context* context, int fd)
221 {
222 	struct file_descriptor* descriptor = remove_fd(context, fd);
223 
224 	if (descriptor == NULL)
225 		return B_FILE_ERROR;
226 
227 	close_fd(context, descriptor);
228 	put_fd(descriptor);
229 		// the reference associated with the slot
230 
231 	return B_OK;
232 }
233 
234 
235 /*!	This descriptor's underlying object will be closed and freed as soon as
236 	possible (in one of the next calls to put_fd() - get_fd() will no longer
237 	succeed on this descriptor).
238 	This is useful if the underlying object is gone, for instance when a
239 	(mounted) volume got removed unexpectedly.
240 */
241 void
242 disconnect_fd(struct file_descriptor* descriptor)
243 {
244 	descriptor->open_mode |= O_DISCONNECTED;
245 }
246 
247 
248 void
249 inc_fd_ref_count(struct file_descriptor* descriptor)
250 {
251 	atomic_add(&descriptor->ref_count, 1);
252 }
253 
254 
255 static struct file_descriptor*
256 get_fd_locked(struct io_context* context, int fd)
257 {
258 	if (fd < 0 || (uint32)fd >= context->table_size)
259 		return NULL;
260 
261 	struct file_descriptor* descriptor = context->fds[fd];
262 
263 	if (descriptor != NULL) {
264 		// disconnected descriptors cannot be accessed anymore
265 		if (descriptor->open_mode & O_DISCONNECTED)
266 			return NULL;
267 
268 		TFD(GetFD(context, fd, descriptor));
269 		inc_fd_ref_count(descriptor);
270 	}
271 
272 	return descriptor;
273 }
274 
275 
276 struct file_descriptor*
277 get_fd(struct io_context* context, int fd)
278 {
279 	MutexLocker _(context->io_mutex);
280 
281 	return get_fd_locked(context, fd);
282 }
283 
284 
285 struct file_descriptor*
286 get_open_fd(struct io_context* context, int fd)
287 {
288 	MutexLocker _(context->io_mutex);
289 
290 	file_descriptor* descriptor = get_fd_locked(context, fd);
291 	if (descriptor == NULL)
292 		return NULL;
293 
294 	atomic_add(&descriptor->open_count, 1);
295 
296 	return descriptor;
297 }
298 
299 
300 /*!	Removes the file descriptor from the specified slot.
301 */
302 static struct file_descriptor*
303 remove_fd(struct io_context* context, int fd)
304 {
305 	struct file_descriptor* descriptor = NULL;
306 
307 	if (fd < 0)
308 		return NULL;
309 
310 	mutex_lock(&context->io_mutex);
311 
312 	if ((uint32)fd < context->table_size)
313 		descriptor = context->fds[fd];
314 
315 	select_info* selectInfos = NULL;
316 	bool disconnected = false;
317 
318 	if (descriptor != NULL)	{
319 		// fd is valid
320 		TFD(RemoveFD(context, fd, descriptor));
321 
322 		context->fds[fd] = NULL;
323 		fd_set_close_on_exec(context, fd, false);
324 		context->num_used_fds--;
325 
326 		selectInfos = context->select_infos[fd];
327 		context->select_infos[fd] = NULL;
328 
329 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
330 	}
331 
332 	if (selectInfos != NULL)
333 		deselect_select_infos(descriptor, selectInfos, true);
334 
335 	mutex_unlock(&context->io_mutex);
336 
337 	return disconnected ? NULL : descriptor;
338 }
339 
340 
341 static int
342 dup_fd(int fd, bool kernel)
343 {
344 	struct io_context* context = get_current_io_context(kernel);
345 	struct file_descriptor* descriptor;
346 	int status;
347 
348 	TRACE(("dup_fd: fd = %d\n", fd));
349 
350 	// Try to get the fd structure
351 	descriptor = get_fd(context, fd);
352 	if (descriptor == NULL)
353 		return B_FILE_ERROR;
354 
355 	// now put the fd in place
356 	status = new_fd(context, descriptor);
357 	if (status < 0)
358 		put_fd(descriptor);
359 	else {
360 		mutex_lock(&context->io_mutex);
361 		fd_set_close_on_exec(context, status, false);
362 		mutex_unlock(&context->io_mutex);
363 	}
364 
365 	return status;
366 }
367 
368 
369 /*!	POSIX says this should be the same as:
370 		close(newfd);
371 		fcntl(oldfd, F_DUPFD, newfd);
372 
373 	We do dup2() directly to be thread-safe.
374 */
375 static int
376 dup2_fd(int oldfd, int newfd, bool kernel)
377 {
378 	struct file_descriptor* evicted = NULL;
379 	struct io_context* context;
380 
381 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
382 
383 	// quick check
384 	if (oldfd < 0 || newfd < 0)
385 		return B_FILE_ERROR;
386 
387 	// Get current I/O context and lock it
388 	context = get_current_io_context(kernel);
389 	mutex_lock(&context->io_mutex);
390 
391 	// Check if the fds are valid (mutex must be locked because
392 	// the table size could be changed)
393 	if ((uint32)oldfd >= context->table_size
394 		|| (uint32)newfd >= context->table_size
395 		|| context->fds[oldfd] == NULL
396 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
397 		mutex_unlock(&context->io_mutex);
398 		return B_FILE_ERROR;
399 	}
400 
401 	// Check for identity, note that it cannot be made above
402 	// because we always want to return an error on invalid
403 	// handles
404 	if (oldfd != newfd) {
405 		// Now do the work
406 		TFD(Dup2FD(context, oldfd, newfd));
407 
408 		evicted = context->fds[newfd];
409 		select_info* selectInfos = context->select_infos[newfd];
410 		context->select_infos[newfd] = NULL;
411 		atomic_add(&context->fds[oldfd]->ref_count, 1);
412 		atomic_add(&context->fds[oldfd]->open_count, 1);
413 		context->fds[newfd] = context->fds[oldfd];
414 
415 		if (evicted == NULL)
416 			context->num_used_fds++;
417 
418 		deselect_select_infos(evicted, selectInfos, true);
419 	}
420 
421 	fd_set_close_on_exec(context, newfd, false);
422 
423 	mutex_unlock(&context->io_mutex);
424 
425 	// Say bye bye to the evicted fd
426 	if (evicted) {
427 		close_fd(context, evicted);
428 		put_fd(evicted);
429 	}
430 
431 	return newfd;
432 }
433 
434 
435 /*!	Duplicates an FD from another team to this/the kernel team.
436 	\param fromTeam The team which owns the FD.
437 	\param fd The FD to duplicate.
438 	\param kernel If \c true, the new FD will be created in the kernel team,
439 			the current userland team otherwise.
440 	\return The newly created FD or an error code, if something went wrong.
441 */
442 int
443 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
444 {
445 	// get the I/O context for the team in question
446 	Team* team = Team::Get(fromTeam);
447 	if (team == NULL)
448 		return B_BAD_TEAM_ID;
449 	BReference<Team> teamReference(team, true);
450 
451 	io_context* fromContext = team->io_context;
452 
453 	// get the file descriptor
454 	file_descriptor* descriptor = get_fd(fromContext, fd);
455 	if (descriptor == NULL)
456 		return B_FILE_ERROR;
457 	FileDescriptorPutter descriptorPutter(descriptor);
458 
459 	// create a new FD in the target I/O context
460 	int result = new_fd(get_current_io_context(kernel), descriptor);
461 	if (result >= 0) {
462 		// the descriptor reference belongs to the slot, now
463 		descriptorPutter.Detach();
464 	}
465 
466 	return result;
467 }
468 
469 
470 static status_t
471 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
472 {
473 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(kernelFD), fd));
474 	if (!descriptor.IsSet())
475 		return B_FILE_ERROR;
476 
477 	// Special case: translate FIONBIO into fcntl(F_SETFL).
478 	if (op == FIONBIO) {
479 		if (buffer == NULL)
480 			return B_BAD_VALUE;
481 
482 		int value;
483 		if (is_called_via_syscall()) {
484 			if (!IS_USER_ADDRESS(buffer)
485 				|| user_memcpy(&value, buffer, sizeof(int)) != B_OK) {
486 				return B_BAD_ADDRESS;
487 			}
488 		} else
489 			value = *(int*)buffer;
490 
491 		size_t argument = descriptor->open_mode & ~O_NONBLOCK;
492 		argument |= (value ? O_NONBLOCK : 0);
493 
494 		return (kernelFD ? _kern_fcntl : _user_fcntl)(fd, F_SETFL, argument);
495 	}
496 
497 	status_t status;
498 	if (descriptor->ops->fd_ioctl)
499 		status = descriptor->ops->fd_ioctl(descriptor.Get(), op, buffer, length);
500 	else
501 		status = B_DEV_INVALID_IOCTL;
502 
503 	if (status == B_DEV_INVALID_IOCTL)
504 		status = ENOTTY;
505 
506 	return status;
507 }
508 
509 
510 static void
511 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
512 	bool putSyncObjects)
513 {
514 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
515 
516 	select_info* info = infos;
517 	while (info != NULL) {
518 		select_sync* sync = info->sync;
519 
520 		// deselect the selected events
521 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
522 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
523 			for (uint16 event = 1; event < 16; event++) {
524 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
525 					descriptor->ops->fd_deselect(descriptor, event,
526 						(selectsync*)info);
527 				}
528 			}
529 		}
530 
531 		select_info* next = info->next;
532 		notify_select_events(info, B_EVENT_INVALID);
533 		info = next;
534 
535 		if (putSyncObjects)
536 			put_select_sync(sync);
537 	}
538 }
539 
540 
541 status_t
542 select_fd(int32 fd, struct select_info* info, bool kernel)
543 {
544 	TRACE(("select_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
545 		info->sync, info->selected_events));
546 
547 	FileDescriptorPutter descriptor;
548 		// define before the context locker, so it will be destroyed after it
549 
550 	io_context* context = get_current_io_context(kernel);
551 	MutexLocker locker(context->io_mutex);
552 
553 	descriptor.SetTo(get_fd_locked(context, fd));
554 	if (!descriptor.IsSet())
555 		return B_FILE_ERROR;
556 
557 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
558 
559 	if (descriptor->ops->fd_select == NULL) {
560 		// if the I/O subsystem doesn't support select(), we will
561 		// immediately notify the select call
562 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
563 		if (eventsToSelect != 0)
564 			return notify_select_events(info, eventsToSelect);
565 		else
566 			return B_OK;
567 	}
568 
569 	// We need the FD to stay open while we're doing this, so no select()/
570 	// deselect() will be called on it after it is closed.
571 	atomic_add(&descriptor->open_count, 1);
572 
573 	locker.Unlock();
574 
575 	// select any events asked for
576 	uint32 selectedEvents = 0;
577 
578 	for (uint16 event = 1; event < 16; event++) {
579 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
580 			&& descriptor->ops->fd_select(descriptor.Get(), event,
581 				(selectsync*)info) == B_OK) {
582 			selectedEvents |= SELECT_FLAG(event);
583 		}
584 	}
585 	info->selected_events = selectedEvents
586 		| (info->selected_events & B_EVENT_INVALID);
587 
588 	// Add the info to the IO context. Even if nothing has been selected -- we
589 	// always support B_EVENT_INVALID.
590 	locker.Lock();
591 	if (context->fds[fd] != descriptor.Get()) {
592 		// Someone close()d the index in the meantime. deselect() all
593 		// events.
594 		info->next = NULL;
595 		deselect_select_infos(descriptor.Get(), info, false);
596 
597 		// Release our open reference of the descriptor.
598 		close_fd(context, descriptor.Get());
599 		return B_FILE_ERROR;
600 	}
601 
602 	// The FD index hasn't changed, so we add the select info to the table.
603 
604 	info->next = context->select_infos[fd];
605 	context->select_infos[fd] = info;
606 
607 	// As long as the info is in the list, we keep a reference to the sync
608 	// object.
609 	acquire_select_sync(info->sync);
610 
611 	// Finally release our open reference. It is safe just to decrement,
612 	// since as long as the descriptor is associated with the slot,
613 	// someone else still has it open.
614 	atomic_add(&descriptor->open_count, -1);
615 
616 	return B_OK;
617 }
618 
619 
620 status_t
621 deselect_fd(int32 fd, struct select_info* info, bool kernel)
622 {
623 	TRACE(("deselect_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
624 		info->sync, info->selected_events));
625 
626 	FileDescriptorPutter descriptor;
627 		// define before the context locker, so it will be destroyed after it
628 
629 	io_context* context = get_current_io_context(kernel);
630 	MutexLocker locker(context->io_mutex);
631 
632 	descriptor.SetTo(get_fd_locked(context, fd));
633 	if (!descriptor.IsSet())
634 		return B_FILE_ERROR;
635 
636 	// remove the info from the IO context
637 
638 	select_info** infoLocation = &context->select_infos[fd];
639 	while (*infoLocation != NULL && *infoLocation != info)
640 		infoLocation = &(*infoLocation)->next;
641 
642 	// If not found, someone else beat us to it.
643 	if (*infoLocation != info)
644 		return B_OK;
645 
646 	*infoLocation = info->next;
647 
648 	locker.Unlock();
649 
650 	// deselect the selected events
651 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
652 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
653 		for (uint16 event = 1; event < 16; event++) {
654 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
655 				descriptor->ops->fd_deselect(descriptor.Get(), event,
656 					(selectsync*)info);
657 			}
658 		}
659 	}
660 
661 	put_select_sync(info->sync);
662 
663 	return B_OK;
664 }
665 
666 
667 /*!	This function checks if the specified fd is valid in the current
668 	context. It can be used for a quick check; the fd is not locked
669 	so it could become invalid immediately after this check.
670 */
671 bool
672 fd_is_valid(int fd, bool kernel)
673 {
674 	struct file_descriptor* descriptor
675 		= get_fd(get_current_io_context(kernel), fd);
676 	if (descriptor == NULL)
677 		return false;
678 
679 	put_fd(descriptor);
680 	return true;
681 }
682 
683 
684 struct vnode*
685 fd_vnode(struct file_descriptor* descriptor)
686 {
687 	switch (descriptor->type) {
688 		case FDTYPE_FILE:
689 		case FDTYPE_DIR:
690 		case FDTYPE_ATTR_DIR:
691 		case FDTYPE_ATTR:
692 			return descriptor->u.vnode;
693 	}
694 
695 	return NULL;
696 }
697 
698 
699 static status_t
700 common_close(int fd, bool kernel)
701 {
702 	return close_fd_index(get_current_io_context(kernel), fd);
703 }
704 
705 
706 static ssize_t
707 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
708 {
709 	if (pos < -1)
710 		return B_BAD_VALUE;
711 
712 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
713 	if (!descriptor.IsSet())
714 		return B_FILE_ERROR;
715 
716 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
717 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
718 		return B_FILE_ERROR;
719 	}
720 
721 	bool movePosition = false;
722 	if (pos == -1 && descriptor->pos != -1) {
723 		pos = descriptor->pos;
724 		movePosition = true;
725 	}
726 
727 	if (write ? descriptor->ops->fd_write == NULL
728 			: descriptor->ops->fd_read == NULL) {
729 		return B_BAD_VALUE;
730 	}
731 
732 	if (length == 0)
733 		return 0;
734 
735 	if (!is_user_address_range(buffer, length))
736 		return B_BAD_ADDRESS;
737 
738 	SyscallRestartWrapper<status_t> status;
739 
740 	if (write)
741 		status = descriptor->ops->fd_write(descriptor.Get(), pos, buffer, &length);
742 	else
743 		status = descriptor->ops->fd_read(descriptor.Get(), pos, buffer, &length);
744 
745 	if (status != B_OK)
746 		return status;
747 
748 	if (movePosition) {
749 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
750 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos + length;
751 	}
752 
753 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
754 }
755 
756 
757 static ssize_t
758 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
759 	bool write)
760 {
761 	if (pos < -1)
762 		return B_BAD_VALUE;
763 	if (count > IOV_MAX)
764 		return B_BAD_VALUE;
765 
766 	BStackOrHeapArray<iovec, 16> vecs(count);
767 	if (!vecs.IsValid())
768 		return B_NO_MEMORY;
769 
770 	status_t error = get_iovecs_from_user(userVecs, count, vecs, true);
771 	if (error != B_OK)
772 		return error;
773 
774 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
775 	if (!descriptor.IsSet())
776 		return B_FILE_ERROR;
777 
778 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
779 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
780 		return B_FILE_ERROR;
781 	}
782 
783 	bool movePosition = false;
784 	if (pos == -1 && descriptor->pos != -1) {
785 		pos = descriptor->pos;
786 		movePosition = true;
787 	}
788 
789 	if (write ? descriptor->ops->fd_write == NULL
790 			: descriptor->ops->fd_read == NULL) {
791 		return B_BAD_VALUE;
792 	}
793 
794 	SyscallRestartWrapper<status_t> status;
795 
796 	ssize_t bytesTransferred = 0;
797 	for (size_t i = 0; i < count; i++) {
798 		if (vecs[i].iov_base == NULL)
799 			continue;
800 
801 		size_t length = vecs[i].iov_len;
802 		if (write) {
803 			status = descriptor->ops->fd_write(descriptor.Get(), pos,
804 				vecs[i].iov_base, &length);
805 		} else {
806 			status = descriptor->ops->fd_read(descriptor.Get(), pos, vecs[i].iov_base,
807 				&length);
808 		}
809 
810 		if (status != B_OK) {
811 			if (bytesTransferred == 0)
812 				return status;
813 			status = B_OK;
814 			break;
815 		}
816 
817 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
818 			bytesTransferred = SSIZE_MAX;
819 		else
820 			bytesTransferred += (ssize_t)length;
821 
822 		if (pos != -1)
823 			pos += length;
824 
825 		if (length < vecs[i].iov_len)
826 			break;
827 	}
828 
829 	if (movePosition) {
830 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
831 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos;
832 	}
833 
834 	return bytesTransferred;
835 }
836 
837 
838 status_t
839 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
840 {
841 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
842 
843 	return fd_ioctl(false, fd, op, buffer, length);
844 }
845 
846 
847 //	#pragma mark - User syscalls
848 
849 
850 ssize_t
851 _user_read(int fd, off_t pos, void* buffer, size_t length)
852 {
853 	return common_user_io(fd, pos, buffer, length, false);
854 }
855 
856 
857 ssize_t
858 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
859 {
860 	return common_user_vector_io(fd, pos, userVecs, count, false);
861 }
862 
863 
864 ssize_t
865 _user_write(int fd, off_t pos, const void* buffer, size_t length)
866 {
867 	return common_user_io(fd, pos, (void*)buffer, length, true);
868 }
869 
870 
871 ssize_t
872 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
873 {
874 	return common_user_vector_io(fd, pos, userVecs, count, true);
875 }
876 
877 
878 off_t
879 _user_seek(int fd, off_t pos, int seekType)
880 {
881 	syscall_64_bit_return_value();
882 
883 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
884 	if (!descriptor.IsSet())
885 		return B_FILE_ERROR;
886 
887 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
888 
889 	if (descriptor->ops->fd_seek)
890 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
891 	else
892 		pos = ESPIPE;
893 
894 	return pos;
895 }
896 
897 
898 status_t
899 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
900 {
901 	TRACE(("user_ioctl: fd %d\n", fd));
902 
903 	// "buffer" is not always a pointer depending on "op", so we cannot
904 	// check that it is a userland buffer here. Instead we check that
905 	// it is at least not within the bounds of kernel memory; as in
906 	// the cases where it is a numeric constant it is usually a low one.
907 	if (IS_KERNEL_ADDRESS(buffer))
908 		return B_BAD_ADDRESS;
909 
910 	SyscallRestartWrapper<status_t> status;
911 
912 	return status = fd_ioctl(false, fd, op, buffer, length);
913 }
914 
915 
916 ssize_t
917 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
918 	uint32 maxCount)
919 {
920 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
921 		"%" B_PRIu32 ")\n", fd, userBuffer, bufferSize, maxCount));
922 
923 	if (maxCount == 0)
924 		return 0;
925 
926 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
927 		return B_BAD_ADDRESS;
928 
929 	// get I/O context and FD
930 	io_context* ioContext = get_current_io_context(false);
931 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
932 	if (!descriptor.IsSet())
933 		return B_FILE_ERROR;
934 
935 	if (descriptor->ops->fd_read_dir == NULL)
936 		return B_UNSUPPORTED;
937 
938 	// restrict buffer size and allocate a heap buffer
939 	if (bufferSize > kMaxReadDirBufferSize)
940 		bufferSize = kMaxReadDirBufferSize;
941 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
942 	if (buffer == NULL)
943 		return B_NO_MEMORY;
944 	MemoryDeleter bufferDeleter(buffer);
945 
946 	// read the directory
947 	uint32 count = maxCount;
948 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(),
949 		buffer, bufferSize, &count);
950 	if (status != B_OK)
951 		return status;
952 
953 	ASSERT(count <= maxCount);
954 
955 	// copy the buffer back -- determine the total buffer size first
956 	size_t sizeToCopy = 0;
957 	BytePointer<struct dirent> entry = buffer;
958 	for (uint32 i = 0; i < count; i++) {
959 		size_t length = entry->d_reclen;
960 		sizeToCopy += length;
961 		entry += length;
962 	}
963 
964 	ASSERT(sizeToCopy <= bufferSize);
965 
966 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
967 		return B_BAD_ADDRESS;
968 
969 	return count;
970 }
971 
972 
973 status_t
974 _user_rewind_dir(int fd)
975 {
976 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
977 
978 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
979 	if (!descriptor.IsSet())
980 		return B_FILE_ERROR;
981 
982 	status_t status;
983 	if (descriptor->ops->fd_rewind_dir)
984 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
985 	else
986 		status = B_UNSUPPORTED;
987 
988 	return status;
989 }
990 
991 
992 status_t
993 _user_close(int fd)
994 {
995 	return common_close(fd, false);
996 }
997 
998 
999 int
1000 _user_dup(int fd)
1001 {
1002 	return dup_fd(fd, false);
1003 }
1004 
1005 
1006 int
1007 _user_dup2(int ofd, int nfd)
1008 {
1009 	return dup2_fd(ofd, nfd, false);
1010 }
1011 
1012 
1013 //	#pragma mark - Kernel calls
1014 
1015 
1016 ssize_t
1017 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1018 {
1019 	if (pos < -1)
1020 		return B_BAD_VALUE;
1021 
1022 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1023 
1024 	if (!descriptor.IsSet())
1025 		return B_FILE_ERROR;
1026 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1027 		return B_FILE_ERROR;
1028 
1029 	bool movePosition = false;
1030 	if (pos == -1 && descriptor->pos != -1) {
1031 		pos = descriptor->pos;
1032 		movePosition = true;
1033 	}
1034 
1035 	SyscallFlagUnsetter _;
1036 
1037 	if (descriptor->ops->fd_read == NULL)
1038 		return B_BAD_VALUE;
1039 
1040 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor.Get(), pos, buffer,
1041 		&length);
1042 	if (bytesRead >= B_OK) {
1043 		if (length > SSIZE_MAX)
1044 			bytesRead = SSIZE_MAX;
1045 		else
1046 			bytesRead = (ssize_t)length;
1047 
1048 		if (movePosition)
1049 			descriptor->pos = pos + length;
1050 	}
1051 
1052 	return bytesRead;
1053 }
1054 
1055 
1056 ssize_t
1057 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1058 {
1059 	status_t status;
1060 
1061 	if (pos < -1)
1062 		return B_BAD_VALUE;
1063 
1064 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1065 
1066 	if (!descriptor.IsSet())
1067 		return B_FILE_ERROR;
1068 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1069 		return B_FILE_ERROR;
1070 
1071 	bool movePosition = false;
1072 	if (pos == -1 && descriptor->pos != -1) {
1073 		pos = descriptor->pos;
1074 		movePosition = true;
1075 	}
1076 
1077 	if (descriptor->ops->fd_read == NULL)
1078 		return B_BAD_VALUE;
1079 
1080 	SyscallFlagUnsetter _;
1081 
1082 	ssize_t bytesRead = 0;
1083 
1084 	for (size_t i = 0; i < count; i++) {
1085 		size_t length = vecs[i].iov_len;
1086 		status = descriptor->ops->fd_read(descriptor.Get(), pos,
1087 			vecs[i].iov_base, &length);
1088 		if (status != B_OK) {
1089 			bytesRead = status;
1090 			break;
1091 		}
1092 
1093 		if ((uint64)bytesRead + length > SSIZE_MAX)
1094 			bytesRead = SSIZE_MAX;
1095 		else
1096 			bytesRead += (ssize_t)length;
1097 
1098 		if (pos != -1)
1099 			pos += vecs[i].iov_len;
1100 	}
1101 
1102 	if (movePosition)
1103 		descriptor->pos = pos;
1104 
1105 	return bytesRead;
1106 }
1107 
1108 
1109 ssize_t
1110 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1111 {
1112 	if (pos < -1)
1113 		return B_BAD_VALUE;
1114 
1115 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1116 
1117 	if (!descriptor.IsSet())
1118 		return B_FILE_ERROR;
1119 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1120 		return B_FILE_ERROR;
1121 
1122 	bool movePosition = false;
1123 	if (pos == -1 && descriptor->pos != -1) {
1124 		pos = descriptor->pos;
1125 		movePosition = true;
1126 	}
1127 
1128 	if (descriptor->ops->fd_write == NULL)
1129 		return B_BAD_VALUE;
1130 
1131 	SyscallFlagUnsetter _;
1132 
1133 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor.Get(), pos,
1134 		buffer,	&length);
1135 	if (bytesWritten >= B_OK) {
1136 		if (length > SSIZE_MAX)
1137 			bytesWritten = SSIZE_MAX;
1138 		else
1139 			bytesWritten = (ssize_t)length;
1140 
1141 		if (movePosition)
1142 			descriptor->pos = pos + length;
1143 	}
1144 
1145 	return bytesWritten;
1146 }
1147 
1148 
1149 ssize_t
1150 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1151 {
1152 	status_t status;
1153 
1154 	if (pos < -1)
1155 		return B_BAD_VALUE;
1156 
1157 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1158 
1159 	if (!descriptor.IsSet())
1160 		return B_FILE_ERROR;
1161 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1162 		return B_FILE_ERROR;
1163 
1164 	bool movePosition = false;
1165 	if (pos == -1 && descriptor->pos != -1) {
1166 		pos = descriptor->pos;
1167 		movePosition = true;
1168 	}
1169 
1170 	if (descriptor->ops->fd_write == NULL)
1171 		return B_BAD_VALUE;
1172 
1173 	SyscallFlagUnsetter _;
1174 
1175 	ssize_t bytesWritten = 0;
1176 
1177 	for (size_t i = 0; i < count; i++) {
1178 		size_t length = vecs[i].iov_len;
1179 		status = descriptor->ops->fd_write(descriptor.Get(), pos,
1180 			vecs[i].iov_base, &length);
1181 		if (status != B_OK) {
1182 			bytesWritten = status;
1183 			break;
1184 		}
1185 
1186 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1187 			bytesWritten = SSIZE_MAX;
1188 		else
1189 			bytesWritten += (ssize_t)length;
1190 
1191 		if (pos != -1)
1192 			pos += vecs[i].iov_len;
1193 	}
1194 
1195 	if (movePosition)
1196 		descriptor->pos = pos;
1197 
1198 	return bytesWritten;
1199 }
1200 
1201 
1202 off_t
1203 _kern_seek(int fd, off_t pos, int seekType)
1204 {
1205 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1206 	if (!descriptor.IsSet())
1207 		return B_FILE_ERROR;
1208 
1209 	if (descriptor->ops->fd_seek)
1210 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
1211 	else
1212 		pos = ESPIPE;
1213 
1214 	return pos;
1215 }
1216 
1217 
1218 status_t
1219 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1220 {
1221 	TRACE(("kern_ioctl: fd %d\n", fd));
1222 
1223 	SyscallFlagUnsetter _;
1224 
1225 	return fd_ioctl(true, fd, op, buffer, length);
1226 }
1227 
1228 
1229 ssize_t
1230 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1231 	uint32 maxCount)
1232 {
1233 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1234 		"%" B_PRIu32 ")\n",fd, buffer, bufferSize, maxCount));
1235 
1236 	struct io_context* ioContext = get_current_io_context(true);
1237 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
1238 	if (!descriptor.IsSet())
1239 		return B_FILE_ERROR;
1240 
1241 	ssize_t retval;
1242 	if (descriptor->ops->fd_read_dir) {
1243 		uint32 count = maxCount;
1244 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(), buffer,
1245 			bufferSize, &count);
1246 		if (retval >= 0)
1247 			retval = count;
1248 	} else
1249 		retval = B_UNSUPPORTED;
1250 
1251 	return retval;
1252 }
1253 
1254 
1255 status_t
1256 _kern_rewind_dir(int fd)
1257 {
1258 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1259 
1260 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1261 	if (!descriptor.IsSet())
1262 		return B_FILE_ERROR;
1263 
1264 	status_t status;
1265 	if (descriptor->ops->fd_rewind_dir)
1266 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
1267 	else
1268 		status = B_UNSUPPORTED;
1269 
1270 	return status;
1271 }
1272 
1273 
1274 status_t
1275 _kern_close(int fd)
1276 {
1277 	return common_close(fd, true);
1278 }
1279 
1280 
1281 int
1282 _kern_dup(int fd)
1283 {
1284 	return dup_fd(fd, true);
1285 }
1286 
1287 
1288 int
1289 _kern_dup2(int ofd, int nfd)
1290 {
1291 	return dup2_fd(ofd, nfd, true);
1292 }
1293 
1294