xref: /haiku/src/system/kernel/fs/fd.cpp (revision 372b901dfeada686207d00bbcce456f748bbda12)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 #include <AutoDeleterDrivers.h>
20 #include <BytePointer.h>
21 
22 #include <syscalls.h>
23 #include <syscall_restart.h>
24 #include <slab/Slab.h>
25 #include <util/AutoLock.h>
26 #include <vfs.h>
27 #include <wait_for_objects.h>
28 
29 #include "vfs_tracing.h"
30 
31 
32 //#define TRACE_FD
33 #ifdef TRACE_FD
34 #	define TRACE(x) dprintf x
35 #else
36 #	define TRACE(x)
37 #endif
38 
39 
40 static const size_t kMaxReadDirBufferSize = 64 * 1024;
41 
42 extern object_cache* sFileDescriptorCache;
43 
44 
45 static struct file_descriptor* get_fd_locked(struct io_context* context,
46 	int fd);
47 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
48 static void deselect_select_infos(file_descriptor* descriptor,
49 	select_info* infos, bool putSyncObjects);
50 
51 
52 struct FDGetterLocking {
53 	inline bool Lock(file_descriptor* /*lockable*/)
54 	{
55 		return false;
56 	}
57 
58 	inline void Unlock(file_descriptor* lockable)
59 	{
60 		put_fd(lockable);
61 	}
62 };
63 
64 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
65 public:
66 	inline FDGetter()
67 		: AutoLocker<file_descriptor, FDGetterLocking>()
68 	{
69 	}
70 
71 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
72 		: AutoLocker<file_descriptor, FDGetterLocking>(
73 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
74 	{
75 	}
76 
77 	inline file_descriptor* SetTo(io_context* context, int fd,
78 		bool contextLocked = false)
79 	{
80 		file_descriptor* descriptor
81 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
82 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
83 		return descriptor;
84 	}
85 
86 	inline file_descriptor* SetTo(int fd, bool kernel,
87 		bool contextLocked = false)
88 	{
89 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
90 	}
91 
92 	inline file_descriptor* FD() const
93 	{
94 		return fLockable;
95 	}
96 };
97 
98 
99 //	#pragma mark - General fd routines
100 
101 
102 #ifdef DEBUG
103 void dump_fd(int fd, struct file_descriptor* descriptor);
104 
105 void
106 dump_fd(int fd,struct file_descriptor* descriptor)
107 {
108 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
109 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
110 		", pos = %" B_PRId64 "\n",
111 		fd, descriptor, descriptor->type, descriptor->ref_count,
112 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
113 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
114 }
115 #endif
116 
117 
118 /*! Allocates and initializes a new file_descriptor.
119 */
120 struct file_descriptor*
121 alloc_fd(void)
122 {
123 	file_descriptor* descriptor
124 		= (file_descriptor*)object_cache_alloc(sFileDescriptorCache, 0);
125 	if (descriptor == NULL)
126 		return NULL;
127 
128 	descriptor->u.vnode = NULL;
129 	descriptor->cookie = NULL;
130 	descriptor->ref_count = 1;
131 	descriptor->open_count = 0;
132 	descriptor->open_mode = 0;
133 	descriptor->pos = 0;
134 
135 	return descriptor;
136 }
137 
138 
139 bool
140 fd_close_on_exec(struct io_context* context, int fd)
141 {
142 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
143 }
144 
145 
146 void
147 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
148 {
149 	if (closeFD)
150 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
151 	else
152 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
153 }
154 
155 
156 /*!	Searches a free slot in the FD table of the provided I/O context, and
157 	inserts the specified descriptor into it.
158 */
159 int
160 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
161 	int firstIndex)
162 {
163 	int fd = -1;
164 	uint32 i;
165 
166 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
167 		return B_BAD_VALUE;
168 
169 	mutex_lock(&context->io_mutex);
170 
171 	for (i = firstIndex; i < context->table_size; i++) {
172 		if (!context->fds[i]) {
173 			fd = i;
174 			break;
175 		}
176 	}
177 	if (fd < 0) {
178 		fd = B_NO_MORE_FDS;
179 		goto err;
180 	}
181 
182 	TFD(NewFD(context, fd, descriptor));
183 
184 	context->fds[fd] = descriptor;
185 	context->num_used_fds++;
186 	atomic_add(&descriptor->open_count, 1);
187 
188 err:
189 	mutex_unlock(&context->io_mutex);
190 
191 	return fd;
192 }
193 
194 
195 int
196 new_fd(struct io_context* context, struct file_descriptor* descriptor)
197 {
198 	return new_fd_etc(context, descriptor, 0);
199 }
200 
201 
202 /*!	Reduces the descriptor's reference counter, and frees all resources
203 	when it's no longer used.
204 */
205 void
206 put_fd(struct file_descriptor* descriptor)
207 {
208 	int32 previous = atomic_add(&descriptor->ref_count, -1);
209 
210 	TFD(PutFD(descriptor));
211 
212 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
213 		descriptor, descriptor->ref_count, descriptor->cookie));
214 
215 	// free the descriptor if we don't need it anymore
216 	if (previous == 1) {
217 		// free the underlying object
218 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
219 			descriptor->ops->fd_free(descriptor);
220 
221 		object_cache_free(sFileDescriptorCache, descriptor, 0);
222 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
223 		&& previous - 1 == descriptor->open_count
224 		&& descriptor->ops != NULL) {
225 		// the descriptor has been disconnected - it cannot
226 		// be accessed anymore, let's close it (no one is
227 		// currently accessing this descriptor)
228 
229 		if (descriptor->ops->fd_close)
230 			descriptor->ops->fd_close(descriptor);
231 		if (descriptor->ops->fd_free)
232 			descriptor->ops->fd_free(descriptor);
233 
234 		// prevent this descriptor from being closed/freed again
235 		descriptor->ops = NULL;
236 		descriptor->u.vnode = NULL;
237 
238 		// the file descriptor is kept intact, so that it's not
239 		// reused until someone explicitly closes it
240 	}
241 }
242 
243 
244 /*!	Decrements the open counter of the file descriptor and invokes
245 	its close hook when appropriate.
246 */
247 void
248 close_fd(struct io_context* context, struct file_descriptor* descriptor)
249 {
250 	// POSIX advisory locks need to be released when any file descriptor closes
251 	if (descriptor->type == FDTYPE_FILE)
252 		vfs_release_posix_lock(context, descriptor);
253 
254 	if (atomic_add(&descriptor->open_count, -1) == 1) {
255 		vfs_unlock_vnode_if_locked(descriptor);
256 
257 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
258 			descriptor->ops->fd_close(descriptor);
259 	}
260 }
261 
262 
263 status_t
264 close_fd_index(struct io_context* context, int fd)
265 {
266 	struct file_descriptor* descriptor = remove_fd(context, fd);
267 
268 	if (descriptor == NULL)
269 		return B_FILE_ERROR;
270 
271 	close_fd(context, descriptor);
272 	put_fd(descriptor);
273 		// the reference associated with the slot
274 
275 	return B_OK;
276 }
277 
278 
279 /*!	This descriptor's underlying object will be closed and freed as soon as
280 	possible (in one of the next calls to put_fd() - get_fd() will no longer
281 	succeed on this descriptor).
282 	This is useful if the underlying object is gone, for instance when a
283 	(mounted) volume got removed unexpectedly.
284 */
285 void
286 disconnect_fd(struct file_descriptor* descriptor)
287 {
288 	descriptor->open_mode |= O_DISCONNECTED;
289 }
290 
291 
292 void
293 inc_fd_ref_count(struct file_descriptor* descriptor)
294 {
295 	atomic_add(&descriptor->ref_count, 1);
296 }
297 
298 
299 static struct file_descriptor*
300 get_fd_locked(struct io_context* context, int fd)
301 {
302 	if (fd < 0 || (uint32)fd >= context->table_size)
303 		return NULL;
304 
305 	struct file_descriptor* descriptor = context->fds[fd];
306 
307 	if (descriptor != NULL) {
308 		// disconnected descriptors cannot be accessed anymore
309 		if (descriptor->open_mode & O_DISCONNECTED)
310 			return NULL;
311 
312 		TFD(GetFD(context, fd, descriptor));
313 		inc_fd_ref_count(descriptor);
314 	}
315 
316 	return descriptor;
317 }
318 
319 
320 struct file_descriptor*
321 get_fd(struct io_context* context, int fd)
322 {
323 	MutexLocker _(context->io_mutex);
324 
325 	return get_fd_locked(context, fd);
326 }
327 
328 
329 struct file_descriptor*
330 get_open_fd(struct io_context* context, int fd)
331 {
332 	MutexLocker _(context->io_mutex);
333 
334 	file_descriptor* descriptor = get_fd_locked(context, fd);
335 	if (descriptor == NULL)
336 		return NULL;
337 
338 	atomic_add(&descriptor->open_count, 1);
339 
340 	return descriptor;
341 }
342 
343 
344 /*!	Removes the file descriptor from the specified slot.
345 */
346 static struct file_descriptor*
347 remove_fd(struct io_context* context, int fd)
348 {
349 	struct file_descriptor* descriptor = NULL;
350 
351 	if (fd < 0)
352 		return NULL;
353 
354 	mutex_lock(&context->io_mutex);
355 
356 	if ((uint32)fd < context->table_size)
357 		descriptor = context->fds[fd];
358 
359 	select_info* selectInfos = NULL;
360 	bool disconnected = false;
361 
362 	if (descriptor != NULL)	{
363 		// fd is valid
364 		TFD(RemoveFD(context, fd, descriptor));
365 
366 		context->fds[fd] = NULL;
367 		fd_set_close_on_exec(context, fd, false);
368 		context->num_used_fds--;
369 
370 		selectInfos = context->select_infos[fd];
371 		context->select_infos[fd] = NULL;
372 
373 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
374 	}
375 
376 	mutex_unlock(&context->io_mutex);
377 
378 	if (selectInfos != NULL)
379 		deselect_select_infos(descriptor, selectInfos, true);
380 
381 	return disconnected ? NULL : descriptor;
382 }
383 
384 
385 static int
386 dup_fd(int fd, bool kernel)
387 {
388 	struct io_context* context = get_current_io_context(kernel);
389 	struct file_descriptor* descriptor;
390 	int status;
391 
392 	TRACE(("dup_fd: fd = %d\n", fd));
393 
394 	// Try to get the fd structure
395 	descriptor = get_fd(context, fd);
396 	if (descriptor == NULL)
397 		return B_FILE_ERROR;
398 
399 	// now put the fd in place
400 	status = new_fd(context, descriptor);
401 	if (status < 0)
402 		put_fd(descriptor);
403 	else {
404 		mutex_lock(&context->io_mutex);
405 		fd_set_close_on_exec(context, status, false);
406 		mutex_unlock(&context->io_mutex);
407 	}
408 
409 	return status;
410 }
411 
412 
413 /*!	POSIX says this should be the same as:
414 		close(newfd);
415 		fcntl(oldfd, F_DUPFD, newfd);
416 
417 	We do dup2() directly to be thread-safe.
418 */
419 static int
420 dup2_fd(int oldfd, int newfd, bool kernel)
421 {
422 	struct file_descriptor* evicted = NULL;
423 	struct io_context* context;
424 
425 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
426 
427 	// quick check
428 	if (oldfd < 0 || newfd < 0)
429 		return B_FILE_ERROR;
430 
431 	// Get current I/O context and lock it
432 	context = get_current_io_context(kernel);
433 	mutex_lock(&context->io_mutex);
434 
435 	// Check if the fds are valid (mutex must be locked because
436 	// the table size could be changed)
437 	if ((uint32)oldfd >= context->table_size
438 		|| (uint32)newfd >= context->table_size
439 		|| context->fds[oldfd] == NULL
440 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
441 		mutex_unlock(&context->io_mutex);
442 		return B_FILE_ERROR;
443 	}
444 
445 	// Check for identity, note that it cannot be made above
446 	// because we always want to return an error on invalid
447 	// handles
448 	select_info* selectInfos = NULL;
449 	if (oldfd != newfd) {
450 		// Now do the work
451 		TFD(Dup2FD(context, oldfd, newfd));
452 
453 		evicted = context->fds[newfd];
454 		selectInfos = context->select_infos[newfd];
455 		context->select_infos[newfd] = NULL;
456 		atomic_add(&context->fds[oldfd]->ref_count, 1);
457 		atomic_add(&context->fds[oldfd]->open_count, 1);
458 		context->fds[newfd] = context->fds[oldfd];
459 
460 		if (evicted == NULL)
461 			context->num_used_fds++;
462 	}
463 
464 	fd_set_close_on_exec(context, newfd, false);
465 
466 	mutex_unlock(&context->io_mutex);
467 
468 	// Say bye bye to the evicted fd
469 	if (evicted) {
470 		deselect_select_infos(evicted, selectInfos, true);
471 		close_fd(context, evicted);
472 		put_fd(evicted);
473 	}
474 
475 	return newfd;
476 }
477 
478 
479 /*!	Duplicates an FD from another team to this/the kernel team.
480 	\param fromTeam The team which owns the FD.
481 	\param fd The FD to duplicate.
482 	\param kernel If \c true, the new FD will be created in the kernel team,
483 			the current userland team otherwise.
484 	\return The newly created FD or an error code, if something went wrong.
485 */
486 int
487 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
488 {
489 	// get the I/O context for the team in question
490 	Team* team = Team::Get(fromTeam);
491 	if (team == NULL)
492 		return B_BAD_TEAM_ID;
493 	BReference<Team> teamReference(team, true);
494 
495 	io_context* fromContext = team->io_context;
496 
497 	// get the file descriptor
498 	file_descriptor* descriptor = get_fd(fromContext, fd);
499 	if (descriptor == NULL)
500 		return B_FILE_ERROR;
501 	DescriptorPutter descriptorPutter(descriptor);
502 
503 	// create a new FD in the target I/O context
504 	int result = new_fd(get_current_io_context(kernel), descriptor);
505 	if (result >= 0) {
506 		// the descriptor reference belongs to the slot, now
507 		descriptorPutter.Detach();
508 	}
509 
510 	return result;
511 }
512 
513 
514 static status_t
515 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
516 {
517 	struct file_descriptor* descriptor;
518 	int status;
519 
520 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
521 	if (descriptor == NULL)
522 		return B_FILE_ERROR;
523 
524 	if (descriptor->ops->fd_ioctl)
525 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
526 	else
527 		status = B_DEV_INVALID_IOCTL;
528 
529 	if (status == B_DEV_INVALID_IOCTL)
530 		status = ENOTTY;
531 
532 	put_fd(descriptor);
533 	return status;
534 }
535 
536 
537 static void
538 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
539 	bool putSyncObjects)
540 {
541 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
542 
543 	select_info* info = infos;
544 	while (info != NULL) {
545 		select_sync* sync = info->sync;
546 
547 		// deselect the selected events
548 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
549 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
550 			for (uint16 event = 1; event < 16; event++) {
551 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
552 					descriptor->ops->fd_deselect(descriptor, event,
553 						(selectsync*)info);
554 				}
555 			}
556 		}
557 
558 		notify_select_events(info, B_EVENT_INVALID);
559 		info = info->next;
560 
561 		if (putSyncObjects)
562 			put_select_sync(sync);
563 	}
564 }
565 
566 
567 status_t
568 select_fd(int32 fd, struct select_info* info, bool kernel)
569 {
570 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
571 		info->sync, info->selected_events));
572 
573 	FDGetter fdGetter;
574 		// define before the context locker, so it will be destroyed after it
575 
576 	io_context* context = get_current_io_context(kernel);
577 	MutexLocker locker(context->io_mutex);
578 
579 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
580 	if (descriptor == NULL)
581 		return B_FILE_ERROR;
582 
583 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
584 
585 	if (descriptor->ops->fd_select == NULL) {
586 		// if the I/O subsystem doesn't support select(), we will
587 		// immediately notify the select call
588 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
589 		if (eventsToSelect != 0)
590 			return notify_select_events(info, eventsToSelect);
591 		else
592 			return B_OK;
593 	}
594 
595 	// We need the FD to stay open while we're doing this, so no select()/
596 	// deselect() will be called on it after it is closed.
597 	atomic_add(&descriptor->open_count, 1);
598 
599 	locker.Unlock();
600 
601 	// select any events asked for
602 	uint32 selectedEvents = 0;
603 
604 	for (uint16 event = 1; event < 16; event++) {
605 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
606 			&& descriptor->ops->fd_select(descriptor, event,
607 				(selectsync*)info) == B_OK) {
608 			selectedEvents |= SELECT_FLAG(event);
609 		}
610 	}
611 	info->selected_events = selectedEvents
612 		| (info->selected_events & B_EVENT_INVALID);
613 
614 	// Add the info to the IO context. Even if nothing has been selected -- we
615 	// always support B_EVENT_INVALID.
616 	locker.Lock();
617 	if (context->fds[fd] != descriptor) {
618 		// Someone close()d the index in the meantime. deselect() all
619 		// events.
620 		info->next = NULL;
621 		deselect_select_infos(descriptor, info, false);
622 
623 		// Release our open reference of the descriptor.
624 		close_fd(context, descriptor);
625 		return B_FILE_ERROR;
626 	}
627 
628 	// The FD index hasn't changed, so we add the select info to the table.
629 
630 	info->next = context->select_infos[fd];
631 	context->select_infos[fd] = info;
632 
633 	// As long as the info is in the list, we keep a reference to the sync
634 	// object.
635 	atomic_add(&info->sync->ref_count, 1);
636 
637 	// Finally release our open reference. It is safe just to decrement,
638 	// since as long as the descriptor is associated with the slot,
639 	// someone else still has it open.
640 	atomic_add(&descriptor->open_count, -1);
641 
642 	return B_OK;
643 }
644 
645 
646 status_t
647 deselect_fd(int32 fd, struct select_info* info, bool kernel)
648 {
649 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
650 		info->sync, info->selected_events));
651 
652 	FDGetter fdGetter;
653 		// define before the context locker, so it will be destroyed after it
654 
655 	io_context* context = get_current_io_context(kernel);
656 	MutexLocker locker(context->io_mutex);
657 
658 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
659 	if (descriptor == NULL)
660 		return B_FILE_ERROR;
661 
662 	// remove the info from the IO context
663 
664 	select_info** infoLocation = &context->select_infos[fd];
665 	while (*infoLocation != NULL && *infoLocation != info)
666 		infoLocation = &(*infoLocation)->next;
667 
668 	// If not found, someone else beat us to it.
669 	if (*infoLocation != info)
670 		return B_OK;
671 
672 	*infoLocation = info->next;
673 
674 	locker.Unlock();
675 
676 	// deselect the selected events
677 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
678 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
679 		for (uint16 event = 1; event < 16; event++) {
680 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
681 				descriptor->ops->fd_deselect(descriptor, event,
682 					(selectsync*)info);
683 			}
684 		}
685 	}
686 
687 	put_select_sync(info->sync);
688 
689 	return B_OK;
690 }
691 
692 
693 /*!	This function checks if the specified fd is valid in the current
694 	context. It can be used for a quick check; the fd is not locked
695 	so it could become invalid immediately after this check.
696 */
697 bool
698 fd_is_valid(int fd, bool kernel)
699 {
700 	struct file_descriptor* descriptor
701 		= get_fd(get_current_io_context(kernel), fd);
702 	if (descriptor == NULL)
703 		return false;
704 
705 	put_fd(descriptor);
706 	return true;
707 }
708 
709 
710 struct vnode*
711 fd_vnode(struct file_descriptor* descriptor)
712 {
713 	switch (descriptor->type) {
714 		case FDTYPE_FILE:
715 		case FDTYPE_DIR:
716 		case FDTYPE_ATTR_DIR:
717 		case FDTYPE_ATTR:
718 			return descriptor->u.vnode;
719 	}
720 
721 	return NULL;
722 }
723 
724 
725 static status_t
726 common_close(int fd, bool kernel)
727 {
728 	return close_fd_index(get_current_io_context(kernel), fd);
729 }
730 
731 
732 static ssize_t
733 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
734 {
735 	if (pos < -1)
736 		return B_BAD_VALUE;
737 
738 	FDGetter fdGetter;
739 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
740 	if (!descriptor)
741 		return B_FILE_ERROR;
742 
743 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
744 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
745 		return B_FILE_ERROR;
746 	}
747 
748 	bool movePosition = false;
749 	if (pos == -1) {
750 		pos = descriptor->pos;
751 		movePosition = true;
752 	}
753 
754 	if (write ? descriptor->ops->fd_write == NULL
755 			: descriptor->ops->fd_read == NULL) {
756 		return B_BAD_VALUE;
757 	}
758 
759 	if (length == 0)
760 		return 0;
761 
762 	if (!IS_USER_ADDRESS(buffer))
763 		return B_BAD_ADDRESS;
764 
765 	SyscallRestartWrapper<status_t> status;
766 
767 	if (write)
768 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
769 	else
770 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
771 
772 	if (status != B_OK)
773 		return status;
774 
775 	if (movePosition) {
776 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
777 			? descriptor->ops->fd_seek(descriptor, 0, SEEK_END) : pos + length;
778 	}
779 
780 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
781 }
782 
783 
784 static ssize_t
785 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
786 	bool write)
787 {
788 	if (!IS_USER_ADDRESS(userVecs))
789 		return B_BAD_ADDRESS;
790 
791 	if (pos < -1)
792 		return B_BAD_VALUE;
793 
794 	// prevent integer overflow exploit in malloc()
795 	if (count > IOV_MAX)
796 		return B_BAD_VALUE;
797 
798 	FDGetter fdGetter;
799 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
800 	if (!descriptor)
801 		return B_FILE_ERROR;
802 
803 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
804 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
805 		return B_FILE_ERROR;
806 	}
807 
808 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
809 	if (vecs == NULL)
810 		return B_NO_MEMORY;
811 	MemoryDeleter _(vecs);
812 
813 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
814 		return B_BAD_ADDRESS;
815 
816 	bool movePosition = false;
817 	if (pos == -1) {
818 		pos = descriptor->pos;
819 		movePosition = true;
820 	}
821 
822 	if (write ? descriptor->ops->fd_write == NULL
823 			: descriptor->ops->fd_read == NULL) {
824 		return B_BAD_VALUE;
825 	}
826 
827 	SyscallRestartWrapper<status_t> status;
828 
829 	ssize_t bytesTransferred = 0;
830 	for (uint32 i = 0; i < count; i++) {
831 		if (vecs[i].iov_base == NULL)
832 			continue;
833 		if (!IS_USER_ADDRESS(vecs[i].iov_base)) {
834 			status = B_BAD_ADDRESS;
835 			if (bytesTransferred == 0)
836 				return status;
837 			break;
838 		}
839 
840 		size_t length = vecs[i].iov_len;
841 		if (write) {
842 			status = descriptor->ops->fd_write(descriptor, pos,
843 				vecs[i].iov_base, &length);
844 		} else {
845 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
846 				&length);
847 		}
848 
849 		if (status != B_OK) {
850 			if (bytesTransferred == 0)
851 				return status;
852 			status = B_OK;
853 			break;
854 		}
855 
856 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
857 			bytesTransferred = SSIZE_MAX;
858 		else
859 			bytesTransferred += (ssize_t)length;
860 
861 		pos += length;
862 
863 		if (length < vecs[i].iov_len)
864 			break;
865 	}
866 
867 	if (movePosition) {
868 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
869 			? descriptor->ops->fd_seek(descriptor, 0, SEEK_END) : pos;
870 	}
871 
872 	return bytesTransferred;
873 }
874 
875 
876 status_t
877 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
878 {
879 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
880 
881 	return fd_ioctl(false, fd, op, buffer, length);
882 }
883 
884 
885 //	#pragma mark - User syscalls
886 
887 
888 ssize_t
889 _user_read(int fd, off_t pos, void* buffer, size_t length)
890 {
891 	return common_user_io(fd, pos, buffer, length, false);
892 }
893 
894 
895 ssize_t
896 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
897 {
898 	return common_user_vector_io(fd, pos, userVecs, count, false);
899 }
900 
901 
902 ssize_t
903 _user_write(int fd, off_t pos, const void* buffer, size_t length)
904 {
905 	return common_user_io(fd, pos, (void*)buffer, length, true);
906 }
907 
908 
909 ssize_t
910 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
911 {
912 	return common_user_vector_io(fd, pos, userVecs, count, true);
913 }
914 
915 
916 off_t
917 _user_seek(int fd, off_t pos, int seekType)
918 {
919 	syscall_64_bit_return_value();
920 
921 	struct file_descriptor* descriptor;
922 
923 	descriptor = get_fd(get_current_io_context(false), fd);
924 	if (!descriptor)
925 		return B_FILE_ERROR;
926 
927 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
928 
929 	if (descriptor->ops->fd_seek)
930 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
931 	else
932 		pos = ESPIPE;
933 
934 	put_fd(descriptor);
935 	return pos;
936 }
937 
938 
939 status_t
940 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
941 {
942 	TRACE(("user_ioctl: fd %d\n", fd));
943 
944 	// "buffer" is not always a pointer depending on "op", so we cannot
945 	// check that it is a userland buffer here. Instead we check that
946 	// it is at least not within the bounds of kernel memory; as in
947 	// the cases where it is a numeric constant it is usually a low one.
948 	if (IS_KERNEL_ADDRESS(buffer))
949 		return B_BAD_ADDRESS;
950 
951 	SyscallRestartWrapper<status_t> status;
952 
953 	return status = fd_ioctl(false, fd, op, buffer, length);
954 }
955 
956 
957 ssize_t
958 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
959 	uint32 maxCount)
960 {
961 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
962 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
963 
964 	if (maxCount == 0)
965 		return 0;
966 
967 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
968 		return B_BAD_ADDRESS;
969 
970 	// get I/O context and FD
971 	io_context* ioContext = get_current_io_context(false);
972 	FDGetter fdGetter;
973 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
974 	if (descriptor == NULL)
975 		return B_FILE_ERROR;
976 
977 	if (descriptor->ops->fd_read_dir == NULL)
978 		return B_UNSUPPORTED;
979 
980 	// restrict buffer size and allocate a heap buffer
981 	if (bufferSize > kMaxReadDirBufferSize)
982 		bufferSize = kMaxReadDirBufferSize;
983 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
984 	if (buffer == NULL)
985 		return B_NO_MEMORY;
986 	MemoryDeleter bufferDeleter(buffer);
987 
988 	// read the directory
989 	uint32 count = maxCount;
990 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
991 		buffer, bufferSize, &count);
992 	if (status != B_OK)
993 		return status;
994 
995 	ASSERT(count <= maxCount);
996 
997 	// copy the buffer back -- determine the total buffer size first
998 	size_t sizeToCopy = 0;
999 	BytePointer<struct dirent> entry = buffer;
1000 	for (uint32 i = 0; i < count; i++) {
1001 		size_t length = entry->d_reclen;
1002 		sizeToCopy += length;
1003 		entry += length;
1004 	}
1005 
1006 	ASSERT(sizeToCopy <= bufferSize);
1007 
1008 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
1009 		return B_BAD_ADDRESS;
1010 
1011 	return count;
1012 }
1013 
1014 
1015 status_t
1016 _user_rewind_dir(int fd)
1017 {
1018 	struct file_descriptor* descriptor;
1019 	status_t status;
1020 
1021 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
1022 
1023 	descriptor = get_fd(get_current_io_context(false), fd);
1024 	if (descriptor == NULL)
1025 		return B_FILE_ERROR;
1026 
1027 	if (descriptor->ops->fd_rewind_dir)
1028 		status = descriptor->ops->fd_rewind_dir(descriptor);
1029 	else
1030 		status = B_UNSUPPORTED;
1031 
1032 	put_fd(descriptor);
1033 	return status;
1034 }
1035 
1036 
1037 status_t
1038 _user_close(int fd)
1039 {
1040 	return common_close(fd, false);
1041 }
1042 
1043 
1044 int
1045 _user_dup(int fd)
1046 {
1047 	return dup_fd(fd, false);
1048 }
1049 
1050 
1051 int
1052 _user_dup2(int ofd, int nfd)
1053 {
1054 	return dup2_fd(ofd, nfd, false);
1055 }
1056 
1057 
1058 //	#pragma mark - Kernel calls
1059 
1060 
1061 ssize_t
1062 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1063 {
1064 	if (pos < -1)
1065 		return B_BAD_VALUE;
1066 
1067 	FDGetter fdGetter;
1068 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1069 
1070 	if (!descriptor)
1071 		return B_FILE_ERROR;
1072 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1073 		return B_FILE_ERROR;
1074 
1075 	bool movePosition = false;
1076 	if (pos == -1) {
1077 		pos = descriptor->pos;
1078 		movePosition = true;
1079 	}
1080 
1081 	SyscallFlagUnsetter _;
1082 
1083 	if (descriptor->ops->fd_read == NULL)
1084 		return B_BAD_VALUE;
1085 
1086 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1087 		&length);
1088 	if (bytesRead >= B_OK) {
1089 		if (length > SSIZE_MAX)
1090 			bytesRead = SSIZE_MAX;
1091 		else
1092 			bytesRead = (ssize_t)length;
1093 
1094 		if (movePosition)
1095 			descriptor->pos = pos + length;
1096 	}
1097 
1098 	return bytesRead;
1099 }
1100 
1101 
1102 ssize_t
1103 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1104 {
1105 	bool movePosition = false;
1106 	status_t status;
1107 	uint32 i;
1108 
1109 	if (pos < -1)
1110 		return B_BAD_VALUE;
1111 
1112 	FDGetter fdGetter;
1113 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1114 
1115 	if (!descriptor)
1116 		return B_FILE_ERROR;
1117 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1118 		return B_FILE_ERROR;
1119 
1120 	if (pos == -1) {
1121 		pos = descriptor->pos;
1122 		movePosition = true;
1123 	}
1124 
1125 	if (descriptor->ops->fd_read == NULL)
1126 		return B_BAD_VALUE;
1127 
1128 	SyscallFlagUnsetter _;
1129 
1130 	ssize_t bytesRead = 0;
1131 
1132 	for (i = 0; i < count; i++) {
1133 		size_t length = vecs[i].iov_len;
1134 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1135 			&length);
1136 		if (status != B_OK) {
1137 			bytesRead = status;
1138 			break;
1139 		}
1140 
1141 		if ((uint64)bytesRead + length > SSIZE_MAX)
1142 			bytesRead = SSIZE_MAX;
1143 		else
1144 			bytesRead += (ssize_t)length;
1145 
1146 		pos += vecs[i].iov_len;
1147 	}
1148 
1149 	if (movePosition)
1150 		descriptor->pos = pos;
1151 
1152 	return bytesRead;
1153 }
1154 
1155 
1156 ssize_t
1157 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1158 {
1159 	if (pos < -1)
1160 		return B_BAD_VALUE;
1161 
1162 	FDGetter fdGetter;
1163 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1164 
1165 	if (descriptor == NULL)
1166 		return B_FILE_ERROR;
1167 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1168 		return B_FILE_ERROR;
1169 
1170 	bool movePosition = false;
1171 	if (pos == -1) {
1172 		pos = descriptor->pos;
1173 		movePosition = true;
1174 	}
1175 
1176 	if (descriptor->ops->fd_write == NULL)
1177 		return B_BAD_VALUE;
1178 
1179 	SyscallFlagUnsetter _;
1180 
1181 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1182 		&length);
1183 	if (bytesWritten >= B_OK) {
1184 		if (length > SSIZE_MAX)
1185 			bytesWritten = SSIZE_MAX;
1186 		else
1187 			bytesWritten = (ssize_t)length;
1188 
1189 		if (movePosition)
1190 			descriptor->pos = pos + length;
1191 	}
1192 
1193 	return bytesWritten;
1194 }
1195 
1196 
1197 ssize_t
1198 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1199 {
1200 	bool movePosition = false;
1201 	status_t status;
1202 	uint32 i;
1203 
1204 	if (pos < -1)
1205 		return B_BAD_VALUE;
1206 
1207 	FDGetter fdGetter;
1208 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1209 
1210 	if (!descriptor)
1211 		return B_FILE_ERROR;
1212 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1213 		return B_FILE_ERROR;
1214 
1215 	if (pos == -1) {
1216 		pos = descriptor->pos;
1217 		movePosition = true;
1218 	}
1219 
1220 	if (descriptor->ops->fd_write == NULL)
1221 		return B_BAD_VALUE;
1222 
1223 	SyscallFlagUnsetter _;
1224 
1225 	ssize_t bytesWritten = 0;
1226 
1227 	for (i = 0; i < count; i++) {
1228 		size_t length = vecs[i].iov_len;
1229 		status = descriptor->ops->fd_write(descriptor, pos,
1230 			vecs[i].iov_base, &length);
1231 		if (status != B_OK) {
1232 			bytesWritten = status;
1233 			break;
1234 		}
1235 
1236 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1237 			bytesWritten = SSIZE_MAX;
1238 		else
1239 			bytesWritten += (ssize_t)length;
1240 
1241 		pos += vecs[i].iov_len;
1242 	}
1243 
1244 	if (movePosition)
1245 		descriptor->pos = pos;
1246 
1247 	return bytesWritten;
1248 }
1249 
1250 
1251 off_t
1252 _kern_seek(int fd, off_t pos, int seekType)
1253 {
1254 	struct file_descriptor* descriptor;
1255 
1256 	descriptor = get_fd(get_current_io_context(true), fd);
1257 	if (!descriptor)
1258 		return B_FILE_ERROR;
1259 
1260 	if (descriptor->ops->fd_seek)
1261 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1262 	else
1263 		pos = ESPIPE;
1264 
1265 	put_fd(descriptor);
1266 	return pos;
1267 }
1268 
1269 
1270 status_t
1271 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1272 {
1273 	TRACE(("kern_ioctl: fd %d\n", fd));
1274 
1275 	SyscallFlagUnsetter _;
1276 
1277 	return fd_ioctl(true, fd, op, buffer, length);
1278 }
1279 
1280 
1281 ssize_t
1282 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1283 	uint32 maxCount)
1284 {
1285 	struct file_descriptor* descriptor;
1286 	ssize_t retval;
1287 
1288 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1289 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1290 
1291 	struct io_context* ioContext = get_current_io_context(true);
1292 	descriptor = get_fd(ioContext, fd);
1293 	if (descriptor == NULL)
1294 		return B_FILE_ERROR;
1295 
1296 	if (descriptor->ops->fd_read_dir) {
1297 		uint32 count = maxCount;
1298 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1299 			bufferSize, &count);
1300 		if (retval >= 0)
1301 			retval = count;
1302 	} else
1303 		retval = B_UNSUPPORTED;
1304 
1305 	put_fd(descriptor);
1306 	return retval;
1307 }
1308 
1309 
1310 status_t
1311 _kern_rewind_dir(int fd)
1312 {
1313 	struct file_descriptor* descriptor;
1314 	status_t status;
1315 
1316 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1317 
1318 	descriptor = get_fd(get_current_io_context(true), fd);
1319 	if (descriptor == NULL)
1320 		return B_FILE_ERROR;
1321 
1322 	if (descriptor->ops->fd_rewind_dir)
1323 		status = descriptor->ops->fd_rewind_dir(descriptor);
1324 	else
1325 		status = B_UNSUPPORTED;
1326 
1327 	put_fd(descriptor);
1328 	return status;
1329 }
1330 
1331 
1332 status_t
1333 _kern_close(int fd)
1334 {
1335 	return common_close(fd, true);
1336 }
1337 
1338 
1339 int
1340 _kern_dup(int fd)
1341 {
1342 	return dup_fd(fd, true);
1343 }
1344 
1345 
1346 int
1347 _kern_dup2(int ofd, int nfd)
1348 {
1349 	return dup2_fd(ofd, nfd, true);
1350 }
1351 
1352