xref: /haiku/src/system/kernel/fs/fd.cpp (revision 344ded80d400028c8f561b4b876257b94c12db4a)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 #include <sys/ioctl.h>
16 
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 #include <AutoDeleterDrivers.h>
21 #include <BytePointer.h>
22 #include <StackOrHeapArray.h>
23 
24 #include <syscalls.h>
25 #include <syscall_restart.h>
26 #include <slab/Slab.h>
27 #include <util/AutoLock.h>
28 #include <util/iovec_support.h>
29 #include <vfs.h>
30 #include <wait_for_objects.h>
31 
32 #include "vfs_tracing.h"
33 
34 
35 //#define TRACE_FD
36 #ifdef TRACE_FD
37 #	define TRACE(x) dprintf x
38 #else
39 #	define TRACE(x)
40 #endif
41 
42 
43 static const size_t kMaxReadDirBufferSize = B_PAGE_SIZE * 2;
44 
45 extern object_cache* sFileDescriptorCache;
46 
47 
48 static struct file_descriptor* get_fd_locked(struct io_context* context,
49 	int fd);
50 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
51 static void deselect_select_infos(file_descriptor* descriptor,
52 	select_info* infos, bool putSyncObjects);
53 
54 
55 //	#pragma mark - General fd routines
56 
57 
58 #ifdef DEBUG
59 void dump_fd(int fd, struct file_descriptor* descriptor);
60 
61 void
62 dump_fd(int fd,struct file_descriptor* descriptor)
63 {
64 	dprintf("fd[%d] = %p: ref_count = %" B_PRId32 ", ops "
65 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
66 		", pos = %" B_PRId64 "\n",
67 		fd, descriptor, descriptor->ref_count,
68 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
69 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
70 }
71 #endif
72 
73 
74 /*! Allocates and initializes a new file_descriptor.
75 */
76 struct file_descriptor*
77 alloc_fd(void)
78 {
79 	file_descriptor* descriptor
80 		= (file_descriptor*)object_cache_alloc(sFileDescriptorCache, 0);
81 	if (descriptor == NULL)
82 		return NULL;
83 
84 	descriptor->u.vnode = NULL;
85 	descriptor->cookie = NULL;
86 	descriptor->ref_count = 1;
87 	descriptor->open_count = 0;
88 	descriptor->open_mode = 0;
89 	descriptor->pos = -1;
90 
91 	return descriptor;
92 }
93 
94 
95 bool
96 fd_close_on_exec(struct io_context* context, int fd)
97 {
98 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
99 }
100 
101 
102 void
103 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
104 {
105 	if (closeFD)
106 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
107 	else
108 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
109 }
110 
111 
112 /*!	Searches a free slot in the FD table of the provided I/O context, and
113 	inserts the specified descriptor into it.
114 */
115 int
116 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
117 	int firstIndex)
118 {
119 	int fd = -1;
120 	uint32 i;
121 
122 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
123 		return B_BAD_VALUE;
124 
125 	mutex_lock(&context->io_mutex);
126 
127 	for (i = firstIndex; i < context->table_size; i++) {
128 		if (!context->fds[i]) {
129 			fd = i;
130 			break;
131 		}
132 	}
133 	if (fd < 0) {
134 		fd = B_NO_MORE_FDS;
135 		goto err;
136 	}
137 
138 	TFD(NewFD(context, fd, descriptor));
139 
140 	context->fds[fd] = descriptor;
141 	context->num_used_fds++;
142 	atomic_add(&descriptor->open_count, 1);
143 
144 err:
145 	mutex_unlock(&context->io_mutex);
146 
147 	return fd;
148 }
149 
150 
151 int
152 new_fd(struct io_context* context, struct file_descriptor* descriptor)
153 {
154 	return new_fd_etc(context, descriptor, 0);
155 }
156 
157 
158 /*!	Reduces the descriptor's reference counter, and frees all resources
159 	when it's no longer used.
160 */
161 void
162 put_fd(struct file_descriptor* descriptor)
163 {
164 	int32 previous = atomic_add(&descriptor->ref_count, -1);
165 
166 	TFD(PutFD(descriptor));
167 
168 	TRACE(("put_fd(descriptor = %p [ref = %" B_PRId32 ", cookie = %p])\n",
169 		descriptor, descriptor->ref_count, descriptor->cookie));
170 
171 	// free the descriptor if we don't need it anymore
172 	if (previous == 1) {
173 		// free the underlying object
174 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
175 			descriptor->ops->fd_free(descriptor);
176 
177 		object_cache_free(sFileDescriptorCache, descriptor, 0);
178 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
179 		&& previous - 1 == descriptor->open_count
180 		&& descriptor->ops != NULL) {
181 		// the descriptor has been disconnected - it cannot
182 		// be accessed anymore, let's close it (no one is
183 		// currently accessing this descriptor)
184 
185 		if (descriptor->ops->fd_close)
186 			descriptor->ops->fd_close(descriptor);
187 		if (descriptor->ops->fd_free)
188 			descriptor->ops->fd_free(descriptor);
189 
190 		// prevent this descriptor from being closed/freed again
191 		descriptor->ops = NULL;
192 		descriptor->u.vnode = NULL;
193 
194 		// the file descriptor is kept intact, so that it's not
195 		// reused until someone explicitly closes it
196 	}
197 }
198 
199 
200 /*!	Decrements the open counter of the file descriptor and invokes
201 	its close hook when appropriate.
202 */
203 void
204 close_fd(struct io_context* context, struct file_descriptor* descriptor)
205 {
206 	// POSIX advisory locks need to be released when any file descriptor closes
207 	if (fd_is_file(descriptor))
208 		vfs_release_posix_lock(context, descriptor);
209 
210 	if (atomic_add(&descriptor->open_count, -1) == 1) {
211 		vfs_unlock_vnode_if_locked(descriptor);
212 
213 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
214 			descriptor->ops->fd_close(descriptor);
215 	}
216 }
217 
218 
219 status_t
220 close_fd_index(struct io_context* context, int fd)
221 {
222 	struct file_descriptor* descriptor = remove_fd(context, fd);
223 
224 	if (descriptor == NULL)
225 		return B_FILE_ERROR;
226 
227 	close_fd(context, descriptor);
228 	put_fd(descriptor);
229 		// the reference associated with the slot
230 
231 	return B_OK;
232 }
233 
234 
235 /*!	This descriptor's underlying object will be closed and freed as soon as
236 	possible (in one of the next calls to put_fd() - get_fd() will no longer
237 	succeed on this descriptor).
238 	This is useful if the underlying object is gone, for instance when a
239 	(mounted) volume got removed unexpectedly.
240 */
241 void
242 disconnect_fd(struct file_descriptor* descriptor)
243 {
244 	descriptor->open_mode |= O_DISCONNECTED;
245 }
246 
247 
248 void
249 inc_fd_ref_count(struct file_descriptor* descriptor)
250 {
251 	atomic_add(&descriptor->ref_count, 1);
252 }
253 
254 
255 static struct file_descriptor*
256 get_fd_locked(struct io_context* context, int fd)
257 {
258 	if (fd < 0 || (uint32)fd >= context->table_size)
259 		return NULL;
260 
261 	struct file_descriptor* descriptor = context->fds[fd];
262 
263 	if (descriptor != NULL) {
264 		// disconnected descriptors cannot be accessed anymore
265 		if (descriptor->open_mode & O_DISCONNECTED)
266 			return NULL;
267 
268 		TFD(GetFD(context, fd, descriptor));
269 		inc_fd_ref_count(descriptor);
270 	}
271 
272 	return descriptor;
273 }
274 
275 
276 struct file_descriptor*
277 get_fd(struct io_context* context, int fd)
278 {
279 	MutexLocker _(context->io_mutex);
280 
281 	return get_fd_locked(context, fd);
282 }
283 
284 
285 struct file_descriptor*
286 get_open_fd(struct io_context* context, int fd)
287 {
288 	MutexLocker _(context->io_mutex);
289 
290 	file_descriptor* descriptor = get_fd_locked(context, fd);
291 	if (descriptor == NULL)
292 		return NULL;
293 
294 	atomic_add(&descriptor->open_count, 1);
295 
296 	return descriptor;
297 }
298 
299 
300 /*!	Removes the file descriptor from the specified slot.
301 */
302 static struct file_descriptor*
303 remove_fd(struct io_context* context, int fd)
304 {
305 	struct file_descriptor* descriptor = NULL;
306 
307 	if (fd < 0)
308 		return NULL;
309 
310 	mutex_lock(&context->io_mutex);
311 
312 	if ((uint32)fd < context->table_size)
313 		descriptor = context->fds[fd];
314 
315 	select_info* selectInfos = NULL;
316 	bool disconnected = false;
317 
318 	if (descriptor != NULL)	{
319 		// fd is valid
320 		TFD(RemoveFD(context, fd, descriptor));
321 
322 		context->fds[fd] = NULL;
323 		fd_set_close_on_exec(context, fd, false);
324 		context->num_used_fds--;
325 
326 		selectInfos = context->select_infos[fd];
327 		context->select_infos[fd] = NULL;
328 
329 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
330 	}
331 
332 	if (selectInfos != NULL)
333 		deselect_select_infos(descriptor, selectInfos, true);
334 
335 	mutex_unlock(&context->io_mutex);
336 
337 	return disconnected ? NULL : descriptor;
338 }
339 
340 
341 static int
342 dup_fd(int fd, bool kernel)
343 {
344 	struct io_context* context = get_current_io_context(kernel);
345 	struct file_descriptor* descriptor;
346 	int status;
347 
348 	TRACE(("dup_fd: fd = %d\n", fd));
349 
350 	// Try to get the fd structure
351 	descriptor = get_fd(context, fd);
352 	if (descriptor == NULL)
353 		return B_FILE_ERROR;
354 
355 	// now put the fd in place
356 	status = new_fd(context, descriptor);
357 	if (status < 0)
358 		put_fd(descriptor);
359 	else {
360 		mutex_lock(&context->io_mutex);
361 		fd_set_close_on_exec(context, status, false);
362 		mutex_unlock(&context->io_mutex);
363 	}
364 
365 	return status;
366 }
367 
368 
369 /*!	POSIX says this should be the same as:
370 		close(newfd);
371 		fcntl(oldfd, F_DUPFD, newfd);
372 
373 	We do dup2() directly to be thread-safe.
374 */
375 static int
376 dup2_fd(int oldfd, int newfd, int flags, bool kernel)
377 {
378 	struct file_descriptor* evicted = NULL;
379 	struct io_context* context;
380 
381 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
382 
383 	// quick check
384 	if (oldfd < 0 || newfd < 0)
385 		return B_FILE_ERROR;
386 	if ((flags & ~O_CLOEXEC) != 0)
387 		return B_BAD_VALUE;
388 
389 	// Get current I/O context and lock it
390 	context = get_current_io_context(kernel);
391 	mutex_lock(&context->io_mutex);
392 
393 	// Check if the fds are valid (mutex must be locked because
394 	// the table size could be changed)
395 	if ((uint32)oldfd >= context->table_size
396 		|| (uint32)newfd >= context->table_size
397 		|| context->fds[oldfd] == NULL
398 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
399 		mutex_unlock(&context->io_mutex);
400 		return B_FILE_ERROR;
401 	}
402 
403 	// Check for identity, note that it cannot be made above
404 	// because we always want to return an error on invalid
405 	// handles
406 	if (oldfd != newfd) {
407 		// Now do the work
408 		TFD(Dup2FD(context, oldfd, newfd));
409 
410 		evicted = context->fds[newfd];
411 		select_info* selectInfos = context->select_infos[newfd];
412 		context->select_infos[newfd] = NULL;
413 		atomic_add(&context->fds[oldfd]->ref_count, 1);
414 		atomic_add(&context->fds[oldfd]->open_count, 1);
415 		context->fds[newfd] = context->fds[oldfd];
416 
417 		if (evicted == NULL)
418 			context->num_used_fds++;
419 
420 		deselect_select_infos(evicted, selectInfos, true);
421 	}
422 
423 	fd_set_close_on_exec(context, newfd, (flags & O_CLOEXEC) != 0);
424 
425 	mutex_unlock(&context->io_mutex);
426 
427 	// Say bye bye to the evicted fd
428 	if (evicted) {
429 		close_fd(context, evicted);
430 		put_fd(evicted);
431 	}
432 
433 	return newfd;
434 }
435 
436 
437 /*!	Duplicates an FD from another team to this/the kernel team.
438 	\param fromTeam The team which owns the FD.
439 	\param fd The FD to duplicate.
440 	\param kernel If \c true, the new FD will be created in the kernel team,
441 			the current userland team otherwise.
442 	\return The newly created FD or an error code, if something went wrong.
443 */
444 int
445 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
446 {
447 	// get the I/O context for the team in question
448 	Team* team = Team::Get(fromTeam);
449 	if (team == NULL)
450 		return B_BAD_TEAM_ID;
451 	BReference<Team> teamReference(team, true);
452 
453 	io_context* fromContext = team->io_context;
454 
455 	// get the file descriptor
456 	file_descriptor* descriptor = get_fd(fromContext, fd);
457 	if (descriptor == NULL)
458 		return B_FILE_ERROR;
459 	FileDescriptorPutter descriptorPutter(descriptor);
460 
461 	// create a new FD in the target I/O context
462 	int result = new_fd(get_current_io_context(kernel), descriptor);
463 	if (result >= 0) {
464 		// the descriptor reference belongs to the slot, now
465 		descriptorPutter.Detach();
466 	}
467 
468 	return result;
469 }
470 
471 
472 static status_t
473 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
474 {
475 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(kernelFD), fd));
476 	if (!descriptor.IsSet())
477 		return B_FILE_ERROR;
478 
479 	// Special case: translate FIONBIO into fcntl(F_SETFL).
480 	if (op == FIONBIO) {
481 		if (buffer == NULL)
482 			return B_BAD_VALUE;
483 
484 		int value;
485 		if (is_called_via_syscall()) {
486 			if (!IS_USER_ADDRESS(buffer)
487 				|| user_memcpy(&value, buffer, sizeof(int)) != B_OK) {
488 				return B_BAD_ADDRESS;
489 			}
490 		} else
491 			value = *(int*)buffer;
492 
493 		size_t argument = descriptor->open_mode & ~O_NONBLOCK;
494 		argument |= (value ? O_NONBLOCK : 0);
495 
496 		return (kernelFD ? _kern_fcntl : _user_fcntl)(fd, F_SETFL, argument);
497 	}
498 
499 	status_t status;
500 	if (descriptor->ops->fd_ioctl)
501 		status = descriptor->ops->fd_ioctl(descriptor.Get(), op, buffer, length);
502 	else
503 		status = B_DEV_INVALID_IOCTL;
504 
505 	if (status == B_DEV_INVALID_IOCTL)
506 		status = ENOTTY;
507 
508 	return status;
509 }
510 
511 
512 static void
513 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
514 	bool putSyncObjects)
515 {
516 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
517 
518 	select_info* info = infos;
519 	while (info != NULL) {
520 		select_sync* sync = info->sync;
521 
522 		// deselect the selected events
523 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
524 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
525 			for (uint16 event = 1; event < 16; event++) {
526 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
527 					descriptor->ops->fd_deselect(descriptor, event,
528 						(selectsync*)info);
529 				}
530 			}
531 		}
532 
533 		select_info* next = info->next;
534 		notify_select_events(info, B_EVENT_INVALID);
535 		info = next;
536 
537 		if (putSyncObjects)
538 			put_select_sync(sync);
539 	}
540 }
541 
542 
543 status_t
544 select_fd(int32 fd, struct select_info* info, bool kernel)
545 {
546 	TRACE(("select_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
547 		info->sync, info->selected_events));
548 
549 	FileDescriptorPutter descriptor;
550 		// define before the context locker, so it will be destroyed after it
551 
552 	io_context* context = get_current_io_context(kernel);
553 	MutexLocker locker(context->io_mutex);
554 
555 	descriptor.SetTo(get_fd_locked(context, fd));
556 	if (!descriptor.IsSet())
557 		return B_FILE_ERROR;
558 
559 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
560 
561 	if (descriptor->ops->fd_select == NULL) {
562 		// if the I/O subsystem doesn't support select(), we will
563 		// immediately notify the select call
564 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
565 		if (eventsToSelect != 0)
566 			notify_select_events(info, eventsToSelect);
567 
568 		info->selected_events = 0;
569 		return B_UNSUPPORTED;
570 	}
571 
572 	// We need the FD to stay open while we're doing this, so no select()/
573 	// deselect() will be called on it after it is closed.
574 	atomic_add(&descriptor->open_count, 1);
575 
576 	locker.Unlock();
577 
578 	// select any events asked for
579 	uint32 selectedEvents = 0;
580 
581 	for (uint16 event = 1; event < 16; event++) {
582 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
583 			&& descriptor->ops->fd_select(descriptor.Get(), event,
584 				(selectsync*)info) == B_OK) {
585 			selectedEvents |= SELECT_FLAG(event);
586 		}
587 	}
588 	info->selected_events = selectedEvents
589 		| (info->selected_events & B_EVENT_INVALID);
590 
591 	// Add the info to the IO context. Even if nothing has been selected -- we
592 	// always support B_EVENT_INVALID.
593 	locker.Lock();
594 	if (context->fds[fd] != descriptor.Get()) {
595 		// Someone close()d the index in the meantime. deselect() all
596 		// events.
597 		info->next = NULL;
598 		deselect_select_infos(descriptor.Get(), info, false);
599 
600 		// Release our open reference of the descriptor.
601 		close_fd(context, descriptor.Get());
602 		return B_FILE_ERROR;
603 	}
604 
605 	// The FD index hasn't changed, so we add the select info to the table.
606 
607 	info->next = context->select_infos[fd];
608 	context->select_infos[fd] = info;
609 
610 	// As long as the info is in the list, we keep a reference to the sync
611 	// object.
612 	acquire_select_sync(info->sync);
613 
614 	// Finally release our open reference. It is safe just to decrement,
615 	// since as long as the descriptor is associated with the slot,
616 	// someone else still has it open.
617 	atomic_add(&descriptor->open_count, -1);
618 
619 	return B_OK;
620 }
621 
622 
623 status_t
624 deselect_fd(int32 fd, struct select_info* info, bool kernel)
625 {
626 	TRACE(("deselect_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
627 		info->sync, info->selected_events));
628 
629 	FileDescriptorPutter descriptor;
630 		// define before the context locker, so it will be destroyed after it
631 
632 	io_context* context = get_current_io_context(kernel);
633 	MutexLocker locker(context->io_mutex);
634 
635 	descriptor.SetTo(get_fd_locked(context, fd));
636 	if (!descriptor.IsSet())
637 		return B_FILE_ERROR;
638 
639 	// remove the info from the IO context
640 
641 	select_info** infoLocation = &context->select_infos[fd];
642 	while (*infoLocation != NULL && *infoLocation != info)
643 		infoLocation = &(*infoLocation)->next;
644 
645 	// If not found, someone else beat us to it.
646 	if (*infoLocation != info)
647 		return B_OK;
648 
649 	*infoLocation = info->next;
650 
651 	locker.Unlock();
652 
653 	// deselect the selected events
654 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
655 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
656 		for (uint16 event = 1; event < 16; event++) {
657 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
658 				descriptor->ops->fd_deselect(descriptor.Get(), event,
659 					(selectsync*)info);
660 			}
661 		}
662 	}
663 
664 	put_select_sync(info->sync);
665 
666 	return B_OK;
667 }
668 
669 
670 /*!	This function checks if the specified fd is valid in the current
671 	context. It can be used for a quick check; the fd is not locked
672 	so it could become invalid immediately after this check.
673 */
674 bool
675 fd_is_valid(int fd, bool kernel)
676 {
677 	struct file_descriptor* descriptor
678 		= get_fd(get_current_io_context(kernel), fd);
679 	if (descriptor == NULL)
680 		return false;
681 
682 	put_fd(descriptor);
683 	return true;
684 }
685 
686 
687 static ssize_t
688 common_vector_io(int fd, off_t pos, const iovec* vecs, size_t count, bool write, bool kernel)
689 {
690 	if (pos < -1)
691 		return B_BAD_VALUE;
692 
693 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(kernel), fd));
694 	if (!descriptor.IsSet())
695 		return B_FILE_ERROR;
696 
697 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
698 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
699 		return B_FILE_ERROR;
700 	}
701 
702 	bool movePosition = false;
703 	if (pos == -1 && descriptor->pos != -1) {
704 		pos = descriptor->pos;
705 		movePosition = true;
706 	}
707 
708 	if (write ? descriptor->ops->fd_write == NULL
709 			: descriptor->ops->fd_read == NULL) {
710 		return B_BAD_VALUE;
711 	}
712 
713 	if (!movePosition && count > 1 && (write ? descriptor->ops->fd_writev != NULL
714 			: descriptor->ops->fd_readv != NULL)) {
715 		ssize_t result;
716 		if (write) {
717 			result = descriptor->ops->fd_writev(descriptor.Get(), pos,
718 				vecs, count);
719 		} else {
720 			result = descriptor->ops->fd_readv(descriptor.Get(), pos,
721 				vecs, count);
722 		}
723 		if (result != B_UNSUPPORTED)
724 			return result;
725 		// If not supported, just fall back to the loop.
726 	}
727 
728 	status_t status = B_OK;
729 	ssize_t bytesTransferred = 0;
730 	for (size_t i = 0; i < count; i++) {
731 		if (vecs[i].iov_base == NULL)
732 			continue;
733 
734 		size_t length = vecs[i].iov_len;
735 		if (write) {
736 			status = descriptor->ops->fd_write(descriptor.Get(), pos,
737 				vecs[i].iov_base, &length);
738 		} else {
739 			status = descriptor->ops->fd_read(descriptor.Get(), pos,
740 				vecs[i].iov_base, &length);
741 		}
742 
743 		if (status != B_OK) {
744 			if (bytesTransferred == 0)
745 				return status;
746 			break;
747 		}
748 
749 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
750 			bytesTransferred = SSIZE_MAX;
751 		else
752 			bytesTransferred += (ssize_t)length;
753 
754 		if (pos != -1)
755 			pos += length;
756 
757 		if (length < vecs[i].iov_len)
758 			break;
759 	}
760 
761 	if (movePosition) {
762 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
763 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos;
764 	}
765 
766 	return bytesTransferred;
767 }
768 
769 
770 static ssize_t
771 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
772 {
773 	if (pos < -1)
774 		return B_BAD_VALUE;
775 
776 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
777 	if (!descriptor.IsSet())
778 		return B_FILE_ERROR;
779 
780 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
781 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
782 		return B_FILE_ERROR;
783 	}
784 
785 	bool movePosition = false;
786 	if (pos == -1 && descriptor->pos != -1) {
787 		pos = descriptor->pos;
788 		movePosition = true;
789 	}
790 
791 	if (write ? descriptor->ops->fd_write == NULL
792 			: descriptor->ops->fd_read == NULL) {
793 		return B_BAD_VALUE;
794 	}
795 
796 	if (length == 0)
797 		return 0;
798 
799 	if (!is_user_address_range(buffer, length))
800 		return B_BAD_ADDRESS;
801 
802 	SyscallRestartWrapper<status_t> status;
803 
804 	if (write)
805 		status = descriptor->ops->fd_write(descriptor.Get(), pos, buffer, &length);
806 	else
807 		status = descriptor->ops->fd_read(descriptor.Get(), pos, buffer, &length);
808 
809 	if (status != B_OK)
810 		return status;
811 
812 	if (movePosition) {
813 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
814 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos + length;
815 	}
816 
817 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
818 }
819 
820 
821 static ssize_t
822 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
823 	bool write)
824 {
825 	if (count > IOV_MAX)
826 		return B_BAD_VALUE;
827 
828 	BStackOrHeapArray<iovec, 16> vecs(count);
829 	if (!vecs.IsValid())
830 		return B_NO_MEMORY;
831 
832 	status_t error = get_iovecs_from_user(userVecs, count, vecs, true);
833 	if (error != B_OK)
834 		return error;
835 
836 	SyscallRestartWrapper<ssize_t> result;
837 	result = common_vector_io(fd, pos, vecs, count, write, false);
838 
839 	return result;
840 }
841 
842 
843 static status_t
844 common_close(int fd, bool kernel)
845 {
846 	return close_fd_index(get_current_io_context(kernel), fd);
847 }
848 
849 
850 status_t
851 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
852 {
853 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
854 
855 	return fd_ioctl(false, fd, op, buffer, length);
856 }
857 
858 
859 //	#pragma mark - User syscalls
860 
861 
862 ssize_t
863 _user_read(int fd, off_t pos, void* buffer, size_t length)
864 {
865 	return common_user_io(fd, pos, buffer, length, false);
866 }
867 
868 
869 ssize_t
870 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
871 {
872 	return common_user_vector_io(fd, pos, userVecs, count, false);
873 }
874 
875 
876 ssize_t
877 _user_write(int fd, off_t pos, const void* buffer, size_t length)
878 {
879 	return common_user_io(fd, pos, (void*)buffer, length, true);
880 }
881 
882 
883 ssize_t
884 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
885 {
886 	return common_user_vector_io(fd, pos, userVecs, count, true);
887 }
888 
889 
890 off_t
891 _user_seek(int fd, off_t pos, int seekType)
892 {
893 	syscall_64_bit_return_value();
894 
895 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
896 	if (!descriptor.IsSet())
897 		return B_FILE_ERROR;
898 
899 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
900 
901 	if (descriptor->ops->fd_seek)
902 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
903 	else
904 		pos = ESPIPE;
905 
906 	return pos;
907 }
908 
909 
910 status_t
911 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
912 {
913 	TRACE(("user_ioctl: fd %d\n", fd));
914 
915 	// "buffer" is not always a pointer depending on "op", so we cannot
916 	// check that it is a userland buffer here. Instead we check that
917 	// it is at least not within the bounds of kernel memory; as in
918 	// the cases where it is a numeric constant it is usually a low one.
919 	if (IS_KERNEL_ADDRESS(buffer))
920 		return B_BAD_ADDRESS;
921 
922 	SyscallRestartWrapper<status_t> status;
923 
924 	return status = fd_ioctl(false, fd, op, buffer, length);
925 }
926 
927 
928 ssize_t
929 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
930 	uint32 maxCount)
931 {
932 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
933 		"%" B_PRIu32 ")\n", fd, userBuffer, bufferSize, maxCount));
934 
935 	if (maxCount == 0)
936 		return 0;
937 
938 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
939 		return B_BAD_ADDRESS;
940 
941 	// get I/O context and FD
942 	io_context* ioContext = get_current_io_context(false);
943 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
944 	if (!descriptor.IsSet())
945 		return B_FILE_ERROR;
946 
947 	if (descriptor->ops->fd_read_dir == NULL)
948 		return B_UNSUPPORTED;
949 
950 	// restrict buffer size and allocate a heap buffer
951 	if (bufferSize > kMaxReadDirBufferSize)
952 		bufferSize = kMaxReadDirBufferSize;
953 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
954 	if (buffer == NULL)
955 		return B_NO_MEMORY;
956 	MemoryDeleter bufferDeleter(buffer);
957 
958 	// read the directory
959 	uint32 count = maxCount;
960 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(),
961 		buffer, bufferSize, &count);
962 	if (status != B_OK)
963 		return status;
964 
965 	ASSERT(count <= maxCount);
966 
967 	// copy the buffer back -- determine the total buffer size first
968 	size_t sizeToCopy = 0;
969 	BytePointer<struct dirent> entry = buffer;
970 	for (uint32 i = 0; i < count; i++) {
971 		size_t length = entry->d_reclen;
972 		sizeToCopy += length;
973 		entry += length;
974 	}
975 
976 	ASSERT(sizeToCopy <= bufferSize);
977 
978 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
979 		return B_BAD_ADDRESS;
980 
981 	return count;
982 }
983 
984 
985 status_t
986 _user_rewind_dir(int fd)
987 {
988 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
989 
990 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
991 	if (!descriptor.IsSet())
992 		return B_FILE_ERROR;
993 
994 	status_t status;
995 	if (descriptor->ops->fd_rewind_dir)
996 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
997 	else
998 		status = B_UNSUPPORTED;
999 
1000 	return status;
1001 }
1002 
1003 
1004 status_t
1005 _user_close(int fd)
1006 {
1007 	return common_close(fd, false);
1008 }
1009 
1010 
1011 int
1012 _user_dup(int fd)
1013 {
1014 	return dup_fd(fd, false);
1015 }
1016 
1017 
1018 int
1019 _user_dup2(int ofd, int nfd, int flags)
1020 {
1021 	return dup2_fd(ofd, nfd, flags, false);
1022 }
1023 
1024 
1025 //	#pragma mark - Kernel calls
1026 
1027 
1028 ssize_t
1029 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1030 {
1031 	if (pos < -1)
1032 		return B_BAD_VALUE;
1033 
1034 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1035 
1036 	if (!descriptor.IsSet())
1037 		return B_FILE_ERROR;
1038 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1039 		return B_FILE_ERROR;
1040 
1041 	bool movePosition = false;
1042 	if (pos == -1 && descriptor->pos != -1) {
1043 		pos = descriptor->pos;
1044 		movePosition = true;
1045 	}
1046 
1047 	SyscallFlagUnsetter _;
1048 
1049 	if (descriptor->ops->fd_read == NULL)
1050 		return B_BAD_VALUE;
1051 
1052 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor.Get(), pos, buffer,
1053 		&length);
1054 	if (bytesRead >= B_OK) {
1055 		if (length > SSIZE_MAX)
1056 			bytesRead = SSIZE_MAX;
1057 		else
1058 			bytesRead = (ssize_t)length;
1059 
1060 		if (movePosition)
1061 			descriptor->pos = pos + length;
1062 	}
1063 
1064 	return bytesRead;
1065 }
1066 
1067 
1068 ssize_t
1069 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1070 {
1071 	if (pos < -1)
1072 		return B_BAD_VALUE;
1073 
1074 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1075 
1076 	if (!descriptor.IsSet())
1077 		return B_FILE_ERROR;
1078 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1079 		return B_FILE_ERROR;
1080 
1081 	bool movePosition = false;
1082 	if (pos == -1 && descriptor->pos != -1) {
1083 		pos = descriptor->pos;
1084 		movePosition = true;
1085 	}
1086 
1087 	if (descriptor->ops->fd_write == NULL)
1088 		return B_BAD_VALUE;
1089 
1090 	SyscallFlagUnsetter _;
1091 
1092 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor.Get(), pos,
1093 		buffer,	&length);
1094 	if (bytesWritten >= B_OK) {
1095 		if (length > SSIZE_MAX)
1096 			bytesWritten = SSIZE_MAX;
1097 		else
1098 			bytesWritten = (ssize_t)length;
1099 
1100 		if (movePosition)
1101 			descriptor->pos = pos + length;
1102 	}
1103 
1104 	return bytesWritten;
1105 }
1106 
1107 
1108 ssize_t
1109 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1110 {
1111 	SyscallFlagUnsetter _;
1112 	return common_vector_io(fd, pos, vecs, count, false, true);
1113 }
1114 
1115 
1116 ssize_t
1117 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1118 {
1119 	SyscallFlagUnsetter _;
1120 	return common_vector_io(fd, pos, vecs, count, true, true);
1121 }
1122 
1123 
1124 off_t
1125 _kern_seek(int fd, off_t pos, int seekType)
1126 {
1127 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1128 	if (!descriptor.IsSet())
1129 		return B_FILE_ERROR;
1130 
1131 	if (descriptor->ops->fd_seek)
1132 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
1133 	else
1134 		pos = ESPIPE;
1135 
1136 	return pos;
1137 }
1138 
1139 
1140 status_t
1141 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1142 {
1143 	TRACE(("kern_ioctl: fd %d\n", fd));
1144 
1145 	SyscallFlagUnsetter _;
1146 
1147 	return fd_ioctl(true, fd, op, buffer, length);
1148 }
1149 
1150 
1151 ssize_t
1152 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1153 	uint32 maxCount)
1154 {
1155 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1156 		"%" B_PRIu32 ")\n",fd, buffer, bufferSize, maxCount));
1157 
1158 	struct io_context* ioContext = get_current_io_context(true);
1159 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
1160 	if (!descriptor.IsSet())
1161 		return B_FILE_ERROR;
1162 
1163 	ssize_t retval;
1164 	if (descriptor->ops->fd_read_dir) {
1165 		uint32 count = maxCount;
1166 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(), buffer,
1167 			bufferSize, &count);
1168 		if (retval >= 0)
1169 			retval = count;
1170 	} else
1171 		retval = B_UNSUPPORTED;
1172 
1173 	return retval;
1174 }
1175 
1176 
1177 status_t
1178 _kern_rewind_dir(int fd)
1179 {
1180 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1181 
1182 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1183 	if (!descriptor.IsSet())
1184 		return B_FILE_ERROR;
1185 
1186 	status_t status;
1187 	if (descriptor->ops->fd_rewind_dir)
1188 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
1189 	else
1190 		status = B_UNSUPPORTED;
1191 
1192 	return status;
1193 }
1194 
1195 
1196 status_t
1197 _kern_close(int fd)
1198 {
1199 	return common_close(fd, true);
1200 }
1201 
1202 
1203 int
1204 _kern_dup(int fd)
1205 {
1206 	return dup_fd(fd, true);
1207 }
1208 
1209 
1210 int
1211 _kern_dup2(int ofd, int nfd, int flags)
1212 {
1213 	return dup2_fd(ofd, nfd, flags, true);
1214 }
1215 
1216