xref: /haiku/src/system/kernel/fs/fd.cpp (revision 984f843b917a1c4e077915c5961a6ef1cf8dabc7)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 #include <sys/ioctl.h>
16 
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 #include <AutoDeleterDrivers.h>
21 #include <BytePointer.h>
22 
23 #include <syscalls.h>
24 #include <syscall_restart.h>
25 #include <slab/Slab.h>
26 #include <util/AutoLock.h>
27 #include <util/iovec_support.h>
28 #include <vfs.h>
29 #include <wait_for_objects.h>
30 
31 #include "vfs_tracing.h"
32 
33 
34 //#define TRACE_FD
35 #ifdef TRACE_FD
36 #	define TRACE(x) dprintf x
37 #else
38 #	define TRACE(x)
39 #endif
40 
41 
42 static const size_t kMaxReadDirBufferSize = 64 * 1024;
43 
44 extern object_cache* sFileDescriptorCache;
45 
46 
47 static struct file_descriptor* get_fd_locked(struct io_context* context,
48 	int fd);
49 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
50 static void deselect_select_infos(file_descriptor* descriptor,
51 	select_info* infos, bool putSyncObjects);
52 
53 
54 //	#pragma mark - General fd routines
55 
56 
57 #ifdef DEBUG
58 void dump_fd(int fd, struct file_descriptor* descriptor);
59 
60 void
61 dump_fd(int fd,struct file_descriptor* descriptor)
62 {
63 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
64 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
65 		", pos = %" B_PRId64 "\n",
66 		fd, descriptor, descriptor->type, descriptor->ref_count,
67 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
68 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
69 }
70 #endif
71 
72 
73 /*! Allocates and initializes a new file_descriptor.
74 */
75 struct file_descriptor*
76 alloc_fd(void)
77 {
78 	file_descriptor* descriptor
79 		= (file_descriptor*)object_cache_alloc(sFileDescriptorCache, 0);
80 	if (descriptor == NULL)
81 		return NULL;
82 
83 	descriptor->u.vnode = NULL;
84 	descriptor->cookie = NULL;
85 	descriptor->ref_count = 1;
86 	descriptor->open_count = 0;
87 	descriptor->open_mode = 0;
88 	descriptor->pos = -1;
89 
90 	return descriptor;
91 }
92 
93 
94 bool
95 fd_close_on_exec(struct io_context* context, int fd)
96 {
97 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
98 }
99 
100 
101 void
102 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
103 {
104 	if (closeFD)
105 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
106 	else
107 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
108 }
109 
110 
111 /*!	Searches a free slot in the FD table of the provided I/O context, and
112 	inserts the specified descriptor into it.
113 */
114 int
115 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
116 	int firstIndex)
117 {
118 	int fd = -1;
119 	uint32 i;
120 
121 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
122 		return B_BAD_VALUE;
123 
124 	mutex_lock(&context->io_mutex);
125 
126 	for (i = firstIndex; i < context->table_size; i++) {
127 		if (!context->fds[i]) {
128 			fd = i;
129 			break;
130 		}
131 	}
132 	if (fd < 0) {
133 		fd = B_NO_MORE_FDS;
134 		goto err;
135 	}
136 
137 	TFD(NewFD(context, fd, descriptor));
138 
139 	context->fds[fd] = descriptor;
140 	context->num_used_fds++;
141 	atomic_add(&descriptor->open_count, 1);
142 
143 err:
144 	mutex_unlock(&context->io_mutex);
145 
146 	return fd;
147 }
148 
149 
150 int
151 new_fd(struct io_context* context, struct file_descriptor* descriptor)
152 {
153 	return new_fd_etc(context, descriptor, 0);
154 }
155 
156 
157 /*!	Reduces the descriptor's reference counter, and frees all resources
158 	when it's no longer used.
159 */
160 void
161 put_fd(struct file_descriptor* descriptor)
162 {
163 	int32 previous = atomic_add(&descriptor->ref_count, -1);
164 
165 	TFD(PutFD(descriptor));
166 
167 	TRACE(("put_fd(descriptor = %p [ref = %" B_PRId32 ", cookie = %p])\n",
168 		descriptor, descriptor->ref_count, descriptor->cookie));
169 
170 	// free the descriptor if we don't need it anymore
171 	if (previous == 1) {
172 		// free the underlying object
173 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
174 			descriptor->ops->fd_free(descriptor);
175 
176 		object_cache_free(sFileDescriptorCache, descriptor, 0);
177 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
178 		&& previous - 1 == descriptor->open_count
179 		&& descriptor->ops != NULL) {
180 		// the descriptor has been disconnected - it cannot
181 		// be accessed anymore, let's close it (no one is
182 		// currently accessing this descriptor)
183 
184 		if (descriptor->ops->fd_close)
185 			descriptor->ops->fd_close(descriptor);
186 		if (descriptor->ops->fd_free)
187 			descriptor->ops->fd_free(descriptor);
188 
189 		// prevent this descriptor from being closed/freed again
190 		descriptor->ops = NULL;
191 		descriptor->u.vnode = NULL;
192 
193 		// the file descriptor is kept intact, so that it's not
194 		// reused until someone explicitly closes it
195 	}
196 }
197 
198 
199 /*!	Decrements the open counter of the file descriptor and invokes
200 	its close hook when appropriate.
201 */
202 void
203 close_fd(struct io_context* context, struct file_descriptor* descriptor)
204 {
205 	// POSIX advisory locks need to be released when any file descriptor closes
206 	if (descriptor->type == FDTYPE_FILE)
207 		vfs_release_posix_lock(context, descriptor);
208 
209 	if (atomic_add(&descriptor->open_count, -1) == 1) {
210 		vfs_unlock_vnode_if_locked(descriptor);
211 
212 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
213 			descriptor->ops->fd_close(descriptor);
214 	}
215 }
216 
217 
218 status_t
219 close_fd_index(struct io_context* context, int fd)
220 {
221 	struct file_descriptor* descriptor = remove_fd(context, fd);
222 
223 	if (descriptor == NULL)
224 		return B_FILE_ERROR;
225 
226 	close_fd(context, descriptor);
227 	put_fd(descriptor);
228 		// the reference associated with the slot
229 
230 	return B_OK;
231 }
232 
233 
234 /*!	This descriptor's underlying object will be closed and freed as soon as
235 	possible (in one of the next calls to put_fd() - get_fd() will no longer
236 	succeed on this descriptor).
237 	This is useful if the underlying object is gone, for instance when a
238 	(mounted) volume got removed unexpectedly.
239 */
240 void
241 disconnect_fd(struct file_descriptor* descriptor)
242 {
243 	descriptor->open_mode |= O_DISCONNECTED;
244 }
245 
246 
247 void
248 inc_fd_ref_count(struct file_descriptor* descriptor)
249 {
250 	atomic_add(&descriptor->ref_count, 1);
251 }
252 
253 
254 static struct file_descriptor*
255 get_fd_locked(struct io_context* context, int fd)
256 {
257 	if (fd < 0 || (uint32)fd >= context->table_size)
258 		return NULL;
259 
260 	struct file_descriptor* descriptor = context->fds[fd];
261 
262 	if (descriptor != NULL) {
263 		// disconnected descriptors cannot be accessed anymore
264 		if (descriptor->open_mode & O_DISCONNECTED)
265 			return NULL;
266 
267 		TFD(GetFD(context, fd, descriptor));
268 		inc_fd_ref_count(descriptor);
269 	}
270 
271 	return descriptor;
272 }
273 
274 
275 struct file_descriptor*
276 get_fd(struct io_context* context, int fd)
277 {
278 	MutexLocker _(context->io_mutex);
279 
280 	return get_fd_locked(context, fd);
281 }
282 
283 
284 struct file_descriptor*
285 get_open_fd(struct io_context* context, int fd)
286 {
287 	MutexLocker _(context->io_mutex);
288 
289 	file_descriptor* descriptor = get_fd_locked(context, fd);
290 	if (descriptor == NULL)
291 		return NULL;
292 
293 	atomic_add(&descriptor->open_count, 1);
294 
295 	return descriptor;
296 }
297 
298 
299 /*!	Removes the file descriptor from the specified slot.
300 */
301 static struct file_descriptor*
302 remove_fd(struct io_context* context, int fd)
303 {
304 	struct file_descriptor* descriptor = NULL;
305 
306 	if (fd < 0)
307 		return NULL;
308 
309 	mutex_lock(&context->io_mutex);
310 
311 	if ((uint32)fd < context->table_size)
312 		descriptor = context->fds[fd];
313 
314 	select_info* selectInfos = NULL;
315 	bool disconnected = false;
316 
317 	if (descriptor != NULL)	{
318 		// fd is valid
319 		TFD(RemoveFD(context, fd, descriptor));
320 
321 		context->fds[fd] = NULL;
322 		fd_set_close_on_exec(context, fd, false);
323 		context->num_used_fds--;
324 
325 		selectInfos = context->select_infos[fd];
326 		context->select_infos[fd] = NULL;
327 
328 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
329 	}
330 
331 	if (selectInfos != NULL)
332 		deselect_select_infos(descriptor, selectInfos, true);
333 
334 	mutex_unlock(&context->io_mutex);
335 
336 	return disconnected ? NULL : descriptor;
337 }
338 
339 
340 static int
341 dup_fd(int fd, bool kernel)
342 {
343 	struct io_context* context = get_current_io_context(kernel);
344 	struct file_descriptor* descriptor;
345 	int status;
346 
347 	TRACE(("dup_fd: fd = %d\n", fd));
348 
349 	// Try to get the fd structure
350 	descriptor = get_fd(context, fd);
351 	if (descriptor == NULL)
352 		return B_FILE_ERROR;
353 
354 	// now put the fd in place
355 	status = new_fd(context, descriptor);
356 	if (status < 0)
357 		put_fd(descriptor);
358 	else {
359 		mutex_lock(&context->io_mutex);
360 		fd_set_close_on_exec(context, status, false);
361 		mutex_unlock(&context->io_mutex);
362 	}
363 
364 	return status;
365 }
366 
367 
368 /*!	POSIX says this should be the same as:
369 		close(newfd);
370 		fcntl(oldfd, F_DUPFD, newfd);
371 
372 	We do dup2() directly to be thread-safe.
373 */
374 static int
375 dup2_fd(int oldfd, int newfd, bool kernel)
376 {
377 	struct file_descriptor* evicted = NULL;
378 	struct io_context* context;
379 
380 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
381 
382 	// quick check
383 	if (oldfd < 0 || newfd < 0)
384 		return B_FILE_ERROR;
385 
386 	// Get current I/O context and lock it
387 	context = get_current_io_context(kernel);
388 	mutex_lock(&context->io_mutex);
389 
390 	// Check if the fds are valid (mutex must be locked because
391 	// the table size could be changed)
392 	if ((uint32)oldfd >= context->table_size
393 		|| (uint32)newfd >= context->table_size
394 		|| context->fds[oldfd] == NULL
395 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
396 		mutex_unlock(&context->io_mutex);
397 		return B_FILE_ERROR;
398 	}
399 
400 	// Check for identity, note that it cannot be made above
401 	// because we always want to return an error on invalid
402 	// handles
403 	if (oldfd != newfd) {
404 		// Now do the work
405 		TFD(Dup2FD(context, oldfd, newfd));
406 
407 		evicted = context->fds[newfd];
408 		select_info* selectInfos = context->select_infos[newfd];
409 		context->select_infos[newfd] = NULL;
410 		atomic_add(&context->fds[oldfd]->ref_count, 1);
411 		atomic_add(&context->fds[oldfd]->open_count, 1);
412 		context->fds[newfd] = context->fds[oldfd];
413 
414 		if (evicted == NULL)
415 			context->num_used_fds++;
416 
417 		deselect_select_infos(evicted, selectInfos, true);
418 	}
419 
420 	fd_set_close_on_exec(context, newfd, false);
421 
422 	mutex_unlock(&context->io_mutex);
423 
424 	// Say bye bye to the evicted fd
425 	if (evicted) {
426 		close_fd(context, evicted);
427 		put_fd(evicted);
428 	}
429 
430 	return newfd;
431 }
432 
433 
434 /*!	Duplicates an FD from another team to this/the kernel team.
435 	\param fromTeam The team which owns the FD.
436 	\param fd The FD to duplicate.
437 	\param kernel If \c true, the new FD will be created in the kernel team,
438 			the current userland team otherwise.
439 	\return The newly created FD or an error code, if something went wrong.
440 */
441 int
442 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
443 {
444 	// get the I/O context for the team in question
445 	Team* team = Team::Get(fromTeam);
446 	if (team == NULL)
447 		return B_BAD_TEAM_ID;
448 	BReference<Team> teamReference(team, true);
449 
450 	io_context* fromContext = team->io_context;
451 
452 	// get the file descriptor
453 	file_descriptor* descriptor = get_fd(fromContext, fd);
454 	if (descriptor == NULL)
455 		return B_FILE_ERROR;
456 	FileDescriptorPutter descriptorPutter(descriptor);
457 
458 	// create a new FD in the target I/O context
459 	int result = new_fd(get_current_io_context(kernel), descriptor);
460 	if (result >= 0) {
461 		// the descriptor reference belongs to the slot, now
462 		descriptorPutter.Detach();
463 	}
464 
465 	return result;
466 }
467 
468 
469 static status_t
470 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
471 {
472 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(kernelFD), fd));
473 	if (!descriptor.IsSet())
474 		return B_FILE_ERROR;
475 
476 	// Special case: translate FIONBIO into fcntl(F_SETFL).
477 	if (op == FIONBIO) {
478 		if (buffer == NULL)
479 			return B_BAD_VALUE;
480 
481 		int value;
482 		if (is_called_via_syscall()) {
483 			if (!IS_USER_ADDRESS(buffer)
484 				|| user_memcpy(&value, buffer, sizeof(int)) != B_OK) {
485 				return B_BAD_ADDRESS;
486 			}
487 		} else
488 			value = *(int*)buffer;
489 
490 		size_t argument = descriptor->open_mode & ~O_NONBLOCK;
491 		argument |= (value ? O_NONBLOCK : 0);
492 
493 		return (kernelFD ? _kern_fcntl : _user_fcntl)(fd, F_SETFL, argument);
494 	}
495 
496 	status_t status;
497 	if (descriptor->ops->fd_ioctl)
498 		status = descriptor->ops->fd_ioctl(descriptor.Get(), op, buffer, length);
499 	else
500 		status = B_DEV_INVALID_IOCTL;
501 
502 	if (status == B_DEV_INVALID_IOCTL)
503 		status = ENOTTY;
504 
505 	return status;
506 }
507 
508 
509 static void
510 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
511 	bool putSyncObjects)
512 {
513 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
514 
515 	select_info* info = infos;
516 	while (info != NULL) {
517 		select_sync* sync = info->sync;
518 
519 		// deselect the selected events
520 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
521 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
522 			for (uint16 event = 1; event < 16; event++) {
523 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
524 					descriptor->ops->fd_deselect(descriptor, event,
525 						(selectsync*)info);
526 				}
527 			}
528 		}
529 
530 		select_info* next = info->next;
531 		notify_select_events(info, B_EVENT_INVALID);
532 		info = next;
533 
534 		if (putSyncObjects)
535 			put_select_sync(sync);
536 	}
537 }
538 
539 
540 status_t
541 select_fd(int32 fd, struct select_info* info, bool kernel)
542 {
543 	TRACE(("select_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
544 		info->sync, info->selected_events));
545 
546 	FileDescriptorPutter descriptor;
547 		// define before the context locker, so it will be destroyed after it
548 
549 	io_context* context = get_current_io_context(kernel);
550 	MutexLocker locker(context->io_mutex);
551 
552 	descriptor.SetTo(get_fd_locked(context, fd));
553 	if (!descriptor.IsSet())
554 		return B_FILE_ERROR;
555 
556 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
557 
558 	if (descriptor->ops->fd_select == NULL) {
559 		// if the I/O subsystem doesn't support select(), we will
560 		// immediately notify the select call
561 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
562 		if (eventsToSelect != 0)
563 			return notify_select_events(info, eventsToSelect);
564 		else
565 			return B_OK;
566 	}
567 
568 	// We need the FD to stay open while we're doing this, so no select()/
569 	// deselect() will be called on it after it is closed.
570 	atomic_add(&descriptor->open_count, 1);
571 
572 	locker.Unlock();
573 
574 	// select any events asked for
575 	uint32 selectedEvents = 0;
576 
577 	for (uint16 event = 1; event < 16; event++) {
578 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
579 			&& descriptor->ops->fd_select(descriptor.Get(), event,
580 				(selectsync*)info) == B_OK) {
581 			selectedEvents |= SELECT_FLAG(event);
582 		}
583 	}
584 	info->selected_events = selectedEvents
585 		| (info->selected_events & B_EVENT_INVALID);
586 
587 	// Add the info to the IO context. Even if nothing has been selected -- we
588 	// always support B_EVENT_INVALID.
589 	locker.Lock();
590 	if (context->fds[fd] != descriptor.Get()) {
591 		// Someone close()d the index in the meantime. deselect() all
592 		// events.
593 		info->next = NULL;
594 		deselect_select_infos(descriptor.Get(), info, false);
595 
596 		// Release our open reference of the descriptor.
597 		close_fd(context, descriptor.Get());
598 		return B_FILE_ERROR;
599 	}
600 
601 	// The FD index hasn't changed, so we add the select info to the table.
602 
603 	info->next = context->select_infos[fd];
604 	context->select_infos[fd] = info;
605 
606 	// As long as the info is in the list, we keep a reference to the sync
607 	// object.
608 	acquire_select_sync(info->sync);
609 
610 	// Finally release our open reference. It is safe just to decrement,
611 	// since as long as the descriptor is associated with the slot,
612 	// someone else still has it open.
613 	atomic_add(&descriptor->open_count, -1);
614 
615 	return B_OK;
616 }
617 
618 
619 status_t
620 deselect_fd(int32 fd, struct select_info* info, bool kernel)
621 {
622 	TRACE(("deselect_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
623 		info->sync, info->selected_events));
624 
625 	FileDescriptorPutter descriptor;
626 		// define before the context locker, so it will be destroyed after it
627 
628 	io_context* context = get_current_io_context(kernel);
629 	MutexLocker locker(context->io_mutex);
630 
631 	descriptor.SetTo(get_fd_locked(context, fd));
632 	if (!descriptor.IsSet())
633 		return B_FILE_ERROR;
634 
635 	// remove the info from the IO context
636 
637 	select_info** infoLocation = &context->select_infos[fd];
638 	while (*infoLocation != NULL && *infoLocation != info)
639 		infoLocation = &(*infoLocation)->next;
640 
641 	// If not found, someone else beat us to it.
642 	if (*infoLocation != info)
643 		return B_OK;
644 
645 	*infoLocation = info->next;
646 
647 	locker.Unlock();
648 
649 	// deselect the selected events
650 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
651 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
652 		for (uint16 event = 1; event < 16; event++) {
653 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
654 				descriptor->ops->fd_deselect(descriptor.Get(), event,
655 					(selectsync*)info);
656 			}
657 		}
658 	}
659 
660 	put_select_sync(info->sync);
661 
662 	return B_OK;
663 }
664 
665 
666 /*!	This function checks if the specified fd is valid in the current
667 	context. It can be used for a quick check; the fd is not locked
668 	so it could become invalid immediately after this check.
669 */
670 bool
671 fd_is_valid(int fd, bool kernel)
672 {
673 	struct file_descriptor* descriptor
674 		= get_fd(get_current_io_context(kernel), fd);
675 	if (descriptor == NULL)
676 		return false;
677 
678 	put_fd(descriptor);
679 	return true;
680 }
681 
682 
683 struct vnode*
684 fd_vnode(struct file_descriptor* descriptor)
685 {
686 	switch (descriptor->type) {
687 		case FDTYPE_FILE:
688 		case FDTYPE_DIR:
689 		case FDTYPE_ATTR_DIR:
690 		case FDTYPE_ATTR:
691 			return descriptor->u.vnode;
692 	}
693 
694 	return NULL;
695 }
696 
697 
698 static status_t
699 common_close(int fd, bool kernel)
700 {
701 	return close_fd_index(get_current_io_context(kernel), fd);
702 }
703 
704 
705 static ssize_t
706 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
707 {
708 	if (pos < -1)
709 		return B_BAD_VALUE;
710 
711 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
712 	if (!descriptor.IsSet())
713 		return B_FILE_ERROR;
714 
715 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
716 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
717 		return B_FILE_ERROR;
718 	}
719 
720 	bool movePosition = false;
721 	if (pos == -1 && descriptor->pos != -1) {
722 		pos = descriptor->pos;
723 		movePosition = true;
724 	}
725 
726 	if (write ? descriptor->ops->fd_write == NULL
727 			: descriptor->ops->fd_read == NULL) {
728 		return B_BAD_VALUE;
729 	}
730 
731 	if (length == 0)
732 		return 0;
733 
734 	if (!is_user_address_range(buffer, length))
735 		return B_BAD_ADDRESS;
736 
737 	SyscallRestartWrapper<status_t> status;
738 
739 	if (write)
740 		status = descriptor->ops->fd_write(descriptor.Get(), pos, buffer, &length);
741 	else
742 		status = descriptor->ops->fd_read(descriptor.Get(), pos, buffer, &length);
743 
744 	if (status != B_OK)
745 		return status;
746 
747 	if (movePosition) {
748 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
749 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos + length;
750 	}
751 
752 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
753 }
754 
755 
756 static ssize_t
757 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
758 	bool write)
759 {
760 	if (pos < -1)
761 		return B_BAD_VALUE;
762 
763 	iovec* vecs;
764 	status_t error = get_iovecs_from_user(userVecs, count, vecs, true);
765 	if (error != B_OK)
766 		return error;
767 	MemoryDeleter _(vecs);
768 
769 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
770 	if (!descriptor.IsSet())
771 		return B_FILE_ERROR;
772 
773 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
774 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
775 		return B_FILE_ERROR;
776 	}
777 
778 	bool movePosition = false;
779 	if (pos == -1 && descriptor->pos != -1) {
780 		pos = descriptor->pos;
781 		movePosition = true;
782 	}
783 
784 	if (write ? descriptor->ops->fd_write == NULL
785 			: descriptor->ops->fd_read == NULL) {
786 		return B_BAD_VALUE;
787 	}
788 
789 	SyscallRestartWrapper<status_t> status;
790 
791 	ssize_t bytesTransferred = 0;
792 	for (size_t i = 0; i < count; i++) {
793 		if (vecs[i].iov_base == NULL)
794 			continue;
795 
796 		size_t length = vecs[i].iov_len;
797 		if (write) {
798 			status = descriptor->ops->fd_write(descriptor.Get(), pos,
799 				vecs[i].iov_base, &length);
800 		} else {
801 			status = descriptor->ops->fd_read(descriptor.Get(), pos, vecs[i].iov_base,
802 				&length);
803 		}
804 
805 		if (status != B_OK) {
806 			if (bytesTransferred == 0)
807 				return status;
808 			status = B_OK;
809 			break;
810 		}
811 
812 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
813 			bytesTransferred = SSIZE_MAX;
814 		else
815 			bytesTransferred += (ssize_t)length;
816 
817 		pos += length;
818 
819 		if (length < vecs[i].iov_len)
820 			break;
821 	}
822 
823 	if (movePosition) {
824 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
825 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos;
826 	}
827 
828 	return bytesTransferred;
829 }
830 
831 
832 status_t
833 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
834 {
835 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
836 
837 	return fd_ioctl(false, fd, op, buffer, length);
838 }
839 
840 
841 //	#pragma mark - User syscalls
842 
843 
844 ssize_t
845 _user_read(int fd, off_t pos, void* buffer, size_t length)
846 {
847 	return common_user_io(fd, pos, buffer, length, false);
848 }
849 
850 
851 ssize_t
852 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
853 {
854 	return common_user_vector_io(fd, pos, userVecs, count, false);
855 }
856 
857 
858 ssize_t
859 _user_write(int fd, off_t pos, const void* buffer, size_t length)
860 {
861 	return common_user_io(fd, pos, (void*)buffer, length, true);
862 }
863 
864 
865 ssize_t
866 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
867 {
868 	return common_user_vector_io(fd, pos, userVecs, count, true);
869 }
870 
871 
872 off_t
873 _user_seek(int fd, off_t pos, int seekType)
874 {
875 	syscall_64_bit_return_value();
876 
877 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
878 	if (!descriptor.IsSet())
879 		return B_FILE_ERROR;
880 
881 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
882 
883 	if (descriptor->ops->fd_seek)
884 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
885 	else
886 		pos = ESPIPE;
887 
888 	return pos;
889 }
890 
891 
892 status_t
893 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
894 {
895 	TRACE(("user_ioctl: fd %d\n", fd));
896 
897 	// "buffer" is not always a pointer depending on "op", so we cannot
898 	// check that it is a userland buffer here. Instead we check that
899 	// it is at least not within the bounds of kernel memory; as in
900 	// the cases where it is a numeric constant it is usually a low one.
901 	if (IS_KERNEL_ADDRESS(buffer))
902 		return B_BAD_ADDRESS;
903 
904 	SyscallRestartWrapper<status_t> status;
905 
906 	return status = fd_ioctl(false, fd, op, buffer, length);
907 }
908 
909 
910 ssize_t
911 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
912 	uint32 maxCount)
913 {
914 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
915 		"%" B_PRIu32 ")\n", fd, userBuffer, bufferSize, maxCount));
916 
917 	if (maxCount == 0)
918 		return 0;
919 
920 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
921 		return B_BAD_ADDRESS;
922 
923 	// get I/O context and FD
924 	io_context* ioContext = get_current_io_context(false);
925 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
926 	if (!descriptor.IsSet())
927 		return B_FILE_ERROR;
928 
929 	if (descriptor->ops->fd_read_dir == NULL)
930 		return B_UNSUPPORTED;
931 
932 	// restrict buffer size and allocate a heap buffer
933 	if (bufferSize > kMaxReadDirBufferSize)
934 		bufferSize = kMaxReadDirBufferSize;
935 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
936 	if (buffer == NULL)
937 		return B_NO_MEMORY;
938 	MemoryDeleter bufferDeleter(buffer);
939 
940 	// read the directory
941 	uint32 count = maxCount;
942 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(),
943 		buffer, bufferSize, &count);
944 	if (status != B_OK)
945 		return status;
946 
947 	ASSERT(count <= maxCount);
948 
949 	// copy the buffer back -- determine the total buffer size first
950 	size_t sizeToCopy = 0;
951 	BytePointer<struct dirent> entry = buffer;
952 	for (uint32 i = 0; i < count; i++) {
953 		size_t length = entry->d_reclen;
954 		sizeToCopy += length;
955 		entry += length;
956 	}
957 
958 	ASSERT(sizeToCopy <= bufferSize);
959 
960 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
961 		return B_BAD_ADDRESS;
962 
963 	return count;
964 }
965 
966 
967 status_t
968 _user_rewind_dir(int fd)
969 {
970 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
971 
972 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
973 	if (!descriptor.IsSet())
974 		return B_FILE_ERROR;
975 
976 	status_t status;
977 	if (descriptor->ops->fd_rewind_dir)
978 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
979 	else
980 		status = B_UNSUPPORTED;
981 
982 	return status;
983 }
984 
985 
986 status_t
987 _user_close(int fd)
988 {
989 	return common_close(fd, false);
990 }
991 
992 
993 int
994 _user_dup(int fd)
995 {
996 	return dup_fd(fd, false);
997 }
998 
999 
1000 int
1001 _user_dup2(int ofd, int nfd)
1002 {
1003 	return dup2_fd(ofd, nfd, false);
1004 }
1005 
1006 
1007 //	#pragma mark - Kernel calls
1008 
1009 
1010 ssize_t
1011 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1012 {
1013 	if (pos < -1)
1014 		return B_BAD_VALUE;
1015 
1016 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1017 
1018 	if (!descriptor.IsSet())
1019 		return B_FILE_ERROR;
1020 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1021 		return B_FILE_ERROR;
1022 
1023 	bool movePosition = false;
1024 	if (pos == -1 && descriptor->pos != -1) {
1025 		pos = descriptor->pos;
1026 		movePosition = true;
1027 	}
1028 
1029 	SyscallFlagUnsetter _;
1030 
1031 	if (descriptor->ops->fd_read == NULL)
1032 		return B_BAD_VALUE;
1033 
1034 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor.Get(), pos, buffer,
1035 		&length);
1036 	if (bytesRead >= B_OK) {
1037 		if (length > SSIZE_MAX)
1038 			bytesRead = SSIZE_MAX;
1039 		else
1040 			bytesRead = (ssize_t)length;
1041 
1042 		if (movePosition)
1043 			descriptor->pos = pos + length;
1044 	}
1045 
1046 	return bytesRead;
1047 }
1048 
1049 
1050 ssize_t
1051 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1052 {
1053 	status_t status;
1054 
1055 	if (pos < -1)
1056 		return B_BAD_VALUE;
1057 
1058 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1059 
1060 	if (!descriptor.IsSet())
1061 		return B_FILE_ERROR;
1062 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1063 		return B_FILE_ERROR;
1064 
1065 	bool movePosition = false;
1066 	if (pos == -1 && descriptor->pos != -1) {
1067 		pos = descriptor->pos;
1068 		movePosition = true;
1069 	}
1070 
1071 	if (descriptor->ops->fd_read == NULL)
1072 		return B_BAD_VALUE;
1073 
1074 	SyscallFlagUnsetter _;
1075 
1076 	ssize_t bytesRead = 0;
1077 
1078 	for (size_t i = 0; i < count; i++) {
1079 		size_t length = vecs[i].iov_len;
1080 		status = descriptor->ops->fd_read(descriptor.Get(), pos,
1081 			vecs[i].iov_base, &length);
1082 		if (status != B_OK) {
1083 			bytesRead = status;
1084 			break;
1085 		}
1086 
1087 		if ((uint64)bytesRead + length > SSIZE_MAX)
1088 			bytesRead = SSIZE_MAX;
1089 		else
1090 			bytesRead += (ssize_t)length;
1091 
1092 		pos += vecs[i].iov_len;
1093 	}
1094 
1095 	if (movePosition)
1096 		descriptor->pos = pos;
1097 
1098 	return bytesRead;
1099 }
1100 
1101 
1102 ssize_t
1103 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1104 {
1105 	if (pos < -1)
1106 		return B_BAD_VALUE;
1107 
1108 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1109 
1110 	if (!descriptor.IsSet())
1111 		return B_FILE_ERROR;
1112 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1113 		return B_FILE_ERROR;
1114 
1115 	bool movePosition = false;
1116 	if (pos == -1 && descriptor->pos != -1) {
1117 		pos = descriptor->pos;
1118 		movePosition = true;
1119 	}
1120 
1121 	if (descriptor->ops->fd_write == NULL)
1122 		return B_BAD_VALUE;
1123 
1124 	SyscallFlagUnsetter _;
1125 
1126 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor.Get(), pos,
1127 		buffer,	&length);
1128 	if (bytesWritten >= B_OK) {
1129 		if (length > SSIZE_MAX)
1130 			bytesWritten = SSIZE_MAX;
1131 		else
1132 			bytesWritten = (ssize_t)length;
1133 
1134 		if (movePosition)
1135 			descriptor->pos = pos + length;
1136 	}
1137 
1138 	return bytesWritten;
1139 }
1140 
1141 
1142 ssize_t
1143 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1144 {
1145 	status_t status;
1146 
1147 	if (pos < -1)
1148 		return B_BAD_VALUE;
1149 
1150 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1151 
1152 	if (!descriptor.IsSet())
1153 		return B_FILE_ERROR;
1154 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1155 		return B_FILE_ERROR;
1156 
1157 	bool movePosition = false;
1158 	if (pos == -1 && descriptor->pos != -1) {
1159 		pos = descriptor->pos;
1160 		movePosition = true;
1161 	}
1162 
1163 	if (descriptor->ops->fd_write == NULL)
1164 		return B_BAD_VALUE;
1165 
1166 	SyscallFlagUnsetter _;
1167 
1168 	ssize_t bytesWritten = 0;
1169 
1170 	for (size_t i = 0; i < count; i++) {
1171 		size_t length = vecs[i].iov_len;
1172 		status = descriptor->ops->fd_write(descriptor.Get(), pos,
1173 			vecs[i].iov_base, &length);
1174 		if (status != B_OK) {
1175 			bytesWritten = status;
1176 			break;
1177 		}
1178 
1179 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1180 			bytesWritten = SSIZE_MAX;
1181 		else
1182 			bytesWritten += (ssize_t)length;
1183 
1184 		pos += vecs[i].iov_len;
1185 	}
1186 
1187 	if (movePosition)
1188 		descriptor->pos = pos;
1189 
1190 	return bytesWritten;
1191 }
1192 
1193 
1194 off_t
1195 _kern_seek(int fd, off_t pos, int seekType)
1196 {
1197 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1198 	if (!descriptor.IsSet())
1199 		return B_FILE_ERROR;
1200 
1201 	if (descriptor->ops->fd_seek)
1202 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
1203 	else
1204 		pos = ESPIPE;
1205 
1206 	return pos;
1207 }
1208 
1209 
1210 status_t
1211 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1212 {
1213 	TRACE(("kern_ioctl: fd %d\n", fd));
1214 
1215 	SyscallFlagUnsetter _;
1216 
1217 	return fd_ioctl(true, fd, op, buffer, length);
1218 }
1219 
1220 
1221 ssize_t
1222 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1223 	uint32 maxCount)
1224 {
1225 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1226 		"%" B_PRIu32 ")\n",fd, buffer, bufferSize, maxCount));
1227 
1228 	struct io_context* ioContext = get_current_io_context(true);
1229 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
1230 	if (!descriptor.IsSet())
1231 		return B_FILE_ERROR;
1232 
1233 	ssize_t retval;
1234 	if (descriptor->ops->fd_read_dir) {
1235 		uint32 count = maxCount;
1236 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(), buffer,
1237 			bufferSize, &count);
1238 		if (retval >= 0)
1239 			retval = count;
1240 	} else
1241 		retval = B_UNSUPPORTED;
1242 
1243 	return retval;
1244 }
1245 
1246 
1247 status_t
1248 _kern_rewind_dir(int fd)
1249 {
1250 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1251 
1252 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1253 	if (!descriptor.IsSet())
1254 		return B_FILE_ERROR;
1255 
1256 	status_t status;
1257 	if (descriptor->ops->fd_rewind_dir)
1258 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
1259 	else
1260 		status = B_UNSUPPORTED;
1261 
1262 	return status;
1263 }
1264 
1265 
1266 status_t
1267 _kern_close(int fd)
1268 {
1269 	return common_close(fd, true);
1270 }
1271 
1272 
1273 int
1274 _kern_dup(int fd)
1275 {
1276 	return dup_fd(fd, true);
1277 }
1278 
1279 
1280 int
1281 _kern_dup2(int ofd, int nfd)
1282 {
1283 	return dup2_fd(ofd, nfd, true);
1284 }
1285 
1286